In my initial analysis of the freshwater-saltwater dataset, I had three vcf files: 1. one generated from all pairwise comparisons of populations, containing SNPs only found in all 16 populations, in 75% of individuals, and with a minor allele frequency of at least 5%. (“separate”) 2. one generated from all pairwise comparisons of populations, containing SNPs found in 4 populations, in 75% of individuals, and with a minor allele frequency of at least 5%. (“P4”) 3. one generated from comparing lumped ‘freshwater’ and ‘saltwater’ populations, containing SNPs found in 50% of individuals and with a minor allele frequency of at least 5%. (“lumped”)

I did the majority of the analyses using set #3, but would like to explore what changes if I use dataset #2. I think #1 is too restrictive. Dataset #2 is in the “subset” dataset, and is what I ran the structure analyses on. This is the dataset I’m going to move forward with for the paper.

Note that in most of these cases the actual analysis will be set to eval=FALSE once I’ve run it once, because then I save the output and only have to read it in, saving compilation time.

source("../../gwscaR/R/gwscaR.R")
source("../../gwscaR/R/gwscaR_plot.R")
source("../../gwscaR/R/gwscaR_utility.R")
source("../../gwscaR/R/gwscaR_fsts.R")
source("../../gwscaR/R/gwscaR_popgen.R")
source("../scripts/002_treemix_plotting_funcs.R")#I've modified these functions
library(knitr)
pop.list<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST","ALFW","FLSG","FLKB",
    "FLFD","FLSI","FLAB","FLPB","FLHB","FLCC","FLLG")
pop.labs<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST","ALFW","FLSG","FLKB",
            "FLFD","FLSI","FLAB","FLPB","FLHB","FLCC","FLFW")
fw.list<-c("TXFW","LAFW","ALFW","FLLG")
sw.list<-c("TXSP","TXCC","TXCB","ALST","FLSG","FLKB",
    "FLFD","FLSI","FLAB","FLPB","FLHB","FLCC")
lgs<-c("LG1","LG2","LG3","LG4","LG5","LG6","LG7","LG8","LG9","LG10","LG11",
    "LG12","LG13","LG14","LG15","LG16","LG17","LG18","LG19","LG20","LG21",
    "LG22")
lgn<-seq(1,22)
all.colors<-c(rep("black",2),"#2166ac","black","#2166ac","black","#2166ac",
        rep("black",8),"#2166ac")
grp.colors<-c('#762a83','#af8dc3','#e7d4e8','#d9f0d3','#7fbf7b','#1b7837')
pwise.fst.all<-read.table("stacks/fwsw_fst_summary.txt",header=T,row.names=1,sep='\t')
    #pwise.fst.all<-rbind(pwise.fst.all,rep(NA,ncol(pwise.fst.all)))
    rownames(pwise.fst.all)<-pop.labs
    colnames(pwise.fst.all)<-pop.labs
pwise.fst.sub<-read.table("stacks/fwsw_fst_summary_subset.txt",header=T,row.names=1,sep='\t')
  colnames(pwise.fst.sub)<-pop.labs
  rownames(pwise.fst.sub)<-pop.labs
ped.sub<-read.table("stacks/subset.ped",header=F)   
ped.sub$V1<-gsub("sample_(\\w{4})\\w+.*","\\1",ped.sub$V2)
map.sub<-read.table("stacks/subset.map",header = F,stringsAsFactors = F)
map.sub$Locus<-paste(gsub("(\\d+)_\\d+","\\1",map.sub$V2),(as.numeric(map.sub$V4)+1),sep=".")
colnames(ped.sub)<-c("Pop","IID","","","","Phenotype","","",map.sub$Locus)

This P4/subset dataset has 9820 SNPs from 9820 RAD loci, from 698 indivudals in 16 populations.

Generate a new vcf file

The first thing to do is to create a vcf file using the subset parameters. I’ve already got a whitelist of loci in the subsetted dataset, so I need to run populations -b 2 -W fwsw_results/subset.whitelist.txt -P fwsw_results/stacks -M fwsw_pops_map.txt --vcf, which I did on 2017-12-18 on silivren-lond. I then re-named it to p4.vcf (and the other output files).

vcf<-parse.vcf("p4.upd.vcf") #this is the smaller dataset
vcf$SNP<-paste(vcf$`#CHROM`,vcf$POS,sep=".")
scaffs<-levels(as.factor(vcf[,1]))
scaffs[1:22]<-lgs
scaff.starts<-tapply(vcf$POS,vcf$`#CHROM`,max)
scaff.starts<-data.frame(rbind(cbind(names(scaff.starts),scaff.starts)),stringsAsFactors = F)
locus.info<-c(colnames(vcf[1:9]),"SNP")

The vcf file contains 9638 SNPs from 9638 RAD loci.

Choose a subset of the SNPs to re-use. [Do I need to do this?]

chosen.snps<-choose.one.snp(vcf)$SNP
write.table(chosen.snps,"chosen.all.snps.txt",quote=F)
chosen.snps<-unlist(read.table("chosen.all.snps.txt"))

There are 2546 SNPs that I’ll use from the vcf file, from 9638 RAD loci.

Figure 1

The first figure in the paper is a map of the collection sites.

jpeg("all_sites_map.jpg", res=300, height=7,width=14, units="in")
pdf("all_sites_map.pdf",height=7,width=14)
par(oma=c(0,0,0,0),mar=c(0,0,0,0),pin=c(7,7))
map("worldHires", "usa",xlim=c(-100,-76), ylim=c(24,32), 
    col="gray90", mar=c(0,0,0,0),fill=TRUE, res=300,myborder=0)
map("worldHires", "mexico",xlim=c(-100,-76), ylim=c(24,32), 
    col="gray95", fill=TRUE, add=TRUE)
points(mar.coor$lon, mar.coor$lat,  col="black", cex=1.2, pch=19)
points(-1*fw.coor$lon, fw.coor$lat,  col="#2166ac", cex=1.5, pch=18)
abline(h=c(25,30,35),lty=3)
abline(v=c(-80,-85,-90,-95,-100),lty=3)
text(x=c(-99.5,-99.5),y=c(25,30),c("25N","30N"))
text(x=c(-80,-85,-90,-95),y=rep(31.8,4),c("80W","85W","90W","95W"))
text(y=26,x=-90,"Gulf of Mexico")
text(y=25.5,x=-98.5,"Mexico")
text(x=-91,y=31,"USA")
text(x=-78,y=29.5,"Atlantic Ocean")
text(x=-96.5,y=26,"TXSP",font=2)
text(x=-96.9,y=27.2,"TXCC",font=2)
text(x=-96,y=28.3,"TXFW",font=2,col="#2166ac")
text(x=-94.7,y=29,"TXCB",font=2)
text(x=-90.2,y=30.3,"LAFW",font=2,col="#2166ac")
text(x=-88,y=30,"ALST",font=2)
text(x=-87,y=30.75,"ALFW",font=2,col="#2166ac")
text(x=-85,y=29.4,"FLSG",font=2)
text(x=-83.5,y=29.2,"FLKB",font=2)
text(x=-83.2,y=27.6,"FLFD",font=2)
text(x=-82.2,y=26,"FLSI",font=2)
text(x=-80,y=24.8,"FLAB",font=2)
text(x=-79.5,y=26.8,"FLPB",font=2)
text(x=-79.7,y=27.2,"FLHB",font=2)
text(x=-80.2,y=28.2,"FLCC",font=2)
text(x=-80.9,y=29.3,"FLFW",font=2,col="#2166ac")
dev.off()
Figure 1. Map of collection sites

Figure 1. Map of collection sites

Figure 2

The second figure in the paper is showing population structure, using STRUCTURE, adegenet, and PCAdapt. These analyses were run as exactly written in fwsw_analysis.R, so I won’t reproduce that code here.

Figure 2. Population Structure

Figure 2. Population Structure

Figure 3

I don’t need to re-calculate pairwise Jost’s D, and Fsts using the P4 (or “subset”) dataset, so I can just read in those files. But I do need to run Treemix and PopTree2.

Treemix

To run treemix, I follow the following steps:

  1. Fit tree without migration
  2. Add migration edges using -m.
  3. Use f3 and f4 ancestry estimation to approximate the amount of admixture and compare to treemix.
  4. Use f4 statistics to understand poor fits.

All of these require setting a root, which is FLPB based on previous trees.

First, I need to create a file in the correct format, which uses the vcf file:

treemix.name<-"treemix/p4_treemix"
treemix.prefix<-"treemix/p4_"
poporder.file<-"treemix/poporder"
fst.tree.name<-as.character("ALLfst_cov_heatmap.png")
tm.fwsw<-treemix.from.vcf(vcf,pop.list)
write.table(tm.fwsw,treemix.name,col.names=TRUE,row.names=FALSE,quote=F,sep=' ')
#then in unix: gzip -c treemix.name > treemix.name.gz

Then, in unix, I need to run gzip -c treemix/p4_treemix > treemix/p4_treemix.gz. Now I can run scripts/run_treemix.sh, which implements steps 1 and 2, and which I need to run in Ubuntu. Note that there are a lot of “no counts” warnings from treemix. Also, that it runs very quickly

After that, I can evaluate the different outcomes.

setwd("treemix")
source("../../scripts/002_treemix_plotting_funcs.R")#I've modified these functions
poporder<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST",
            "ALFW","FLSG","FLKB","FLFD","FLSI","FLAB",
            "FLPB","FLHB","FLCC","FLLG")
colors<-poporder
colors[colors %in% "FLLG"]<-grp.colors[6]
colors[colors %in% c("FLPB","FLHB","FLCC")]<-grp.colors[6]
colors[colors %in% c("FLAB")]<-grp.colors[5]
colors[colors %in% c("FLSI","FLFD","FLKB","FLSG")]<-grp.colors[3]
colors[colors %in% c("ALST","ALFW","LAFW")]<-grp.colors[2]
colors[colors %in% c("TXSP","TXCC","TXFW","TXCB")]<-grp.colors[1]
write.table(cbind(poporder,colors),"poporder",quote=F,sep='\t')
setwd("../")
m0<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBr",sep=""),poporder,split=c(1,1,3,2),more=TRUE)
m1<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm1",sep=""),poporder,split=c(2,1,3,2),more=TRUE)
m2<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm2",sep=""),poporder,split=c(3,1,3,2),more=TRUE)
m3<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm3",sep=""),poporder,split=c(1,2,3,2),more=TRUE)
m4<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm4",sep=""),poporder,split=c(2,2,3,2),more=TRUE)
m5<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm5",sep=""),poporder,split=c(3,2,3,2),more=FALSE)

par(mfrow=c(2,3))
r0<-plot_resid(paste(treemix.prefix,"k100bFLPBr",sep=""),poporder.file)
r1<-plot_resid(paste(treemix.prefix,"k100bFLPBrm1",sep=""),poporder.file)
r2<-plot_resid(paste(treemix.prefix,"k100bFLPBrm2",sep=""),poporder.file)
r3<-plot_resid(paste(treemix.prefix,"k100bFLPBrm3",sep=""),poporder.file)
r4<-plot_resid(paste(treemix.prefix,"k100bFLPBrm4",sep=""),poporder.file)
r5<-plot_resid(paste(treemix.prefix,"k100bFLPBrm5",sep=""),poporder.file)

Population pairs that are ‘too far apart’ on the tree (have high error estimates) are ones that are likely candidates for gene flow - and these are the squares with dark greens, blues, and black. These are LAFW-TXFW and ALFW-TXFW in almost all of the SE graphs

par(mfrow=c(2,3),mar=c(1,1,1,1),oma=c(1,1,1,1))
t0<-plot_tree(paste(treemix.prefix,"k100bFLPBr",sep=""),plotmig = F,plus=0.05,scale=F,mbar=F)
t1<-plot_tree(paste(treemix.prefix,"k100bFLPBrm1",sep=""),plus=0.05,scale=F,mbar=F)
t2<-plot_tree(paste(treemix.prefix,"k100bFLPBrm2",sep=""),plus=0.05,scale=F,mbar=F)
t3<-plot_tree(paste(treemix.prefix,"k100bFLPBrm3",sep=""),plus=0.05,scale=F,mbar=F)
t4<-plot_tree(paste(treemix.prefix,"k100bFLPBrm4",sep=""),plus=0.05,scale=F,mbar=F)
t5<-plot_tree(paste(treemix.prefix,"k100bFLPBrm5",sep=""),plus=0.05,scale=F,mbar=F)

Look at the p-values: do migration events always improve the fit of the data?

tree0<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBr.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "")
tree1<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm1.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree2<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm2.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree3<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm3.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree4<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm4.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree5<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm5.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)

tree1[,4]
## [1] 9.18188e-12
tree2[,4]
## [1] "6.61839e-05"   "<2.22507e-308"
tree3[,4]
## [1] "1.02604e-05"   "<2.22507e-308" "1.88599e-10"
tree4[,4]
## [1] "<2.22507e-308" "<2.22507e-308" "<2.22507e-308" "2.89325e-05"
tree5[,4]
## [1] "6.55032e-15"   "4.94959e-09"   "<2.22507e-308" "0.00690872"   
## [5] "<2.22507e-308"

All of the added migration edges improve the fit. Evaluating both the residual and migration edge plots, we can see that four migration edges reduces the error between LAFW & TXFW and ALFW & TXFW. The largest SE is between FLFW (FLLG) and itself, and secondarily between LAFW and itself. This means that those branches are surprisingly long, which I am comfortable accepting. The strongest migration edge, between the branch from the Atlantic Florida to Gulf Florida populations -> FLAB, is unsurprising because that is an intermediate location between the Atlantic and Gulf coasts. In some of the structure analyses it clusters with the Gulf coast and in others it is its own admixed group - supporting the tree structure here.

The other migration events are somewhat more surprising. We will evaluate those using the three and four population analyses.

Evaluating migration edges

First, let’s read in the three and four population analysis results. These tested all possible three- and four-population tree groups to see whether they pass a test of ‘treeness’. If not, their p-value will be small, and they don’t form a nice tree.

f3.name<-"treemix/p4_threepop.txt"
f4.name<-"treemix/p4_fourpop.txt"

#extract the relevant lines
extract.fs<-function(filename,pat="^[A-Z]{4};"){
  f<-readLines(filename)
  fmatch<-f[grep(pat,f)]
  f<-read.table(text=fmatch)
  colnames(f)<-c("pops","f","SE","Z")
  return(f)
}
f3<-extract.fs(f3.name)
f4<-extract.fs(f4.name,pat="^[A-Z]{4},")

#add p-values
f3$p<-pnorm(f3$Z)
f4$p<-pnorm(f4$Z)

Now we can investigate the migration edges added to the trees.

Atlantic/Gulf->FLAB

This least surprising migration edge would be expected to show a signature of admixture. I’ll check the treeness of FLHB;FLSI,FLAB and FLHB,FLCC;FLSI,FLAB. For comparison, I can look at FLHB;FLSI,FLFD and FLHB,FLCC;FLSI,FLFD

checks3<-c("FLHB;FLSI,FLAB","FLHB;FLFD,FLSI")
f3[f3$pops %in% checks3,]
##                pops         f          SE       Z p
## 1583 FLHB;FLFD,FLSI 0.0250745 0.000545858 45.9359 1
## 1625 FLHB;FLSI,FLAB 0.0224990 0.000508579 44.2390 1
checks4<-c("FLSI,FLAB;FLHB,FLCC","FLFD,FLSI;FLHB,FLCC")
f4[f4$pops %in% checks4,]
##                     pops            f          SE        Z            p
## 5379 FLFD,FLSI;FLHB,FLCC -0.000159989 6.74083e-05 -2.37343 8.811867e-03
## 5425 FLSI,FLAB;FLHB,FLCC -0.000470815 8.68360e-05 -5.42188 2.948773e-08

They pass the three-population tests but not the four-population tests.

TXFW->TXCB

To investigate this migration edge, I need to evaluate whether a [TXFW[TXCB,TXCC]] or [TXFW[TXCB,TXSP]] topology is an appropriate tree.

checks<-c("TXFW;TXCC,TXCB",
          "TXFW;TXSP,TXCB")
f3[f3$pops %in% checks,]
##               pops         f         SE       Z p
## 44  TXFW;TXSP,TXCB 0.0294602 0.00227983 12.9221 1
## 318 TXFW;TXCC,TXCB 0.0304362 0.00230450 13.2073 1

Neither of these fail the treeness test, so TXFW is essentially functioning as an effective outgroup/more ancestral population. Let’s check if a four population tree also makes sense.

f4[f4$pops %in% "TXSP,TXCC;TXFW,TXCB",]
##                  pops           f          SE       Z p
## 2 TXSP,TXCC;TXFW,TXCB 0.000976003 0.000160092 6.09653 1

This also passes the treeness test - so this isn’t an actual admixture event, more of demonstrating shared ancestry.

TXFW->LAFW/ALFW

This migration edge would suggest a three-population structure of [TXFW[ALFW,LAFW]] or a four-population structure of [TXFW,TXCB[LAFW,ALFW]] or [TXFW,ALST[LAFW,ALFW]]

checks<-c("TXFW,TXCB;LAFW,ALFW",
          "TXFW,ALST;LAFW,ALFW")#to get the correct order of populations I had to
                                #use grep and manually try a few different ones
f4[f4$pops %in% checks,]
##                     pops           f          SE        Z            p
## 2463 TXFW,TXCB;LAFW,ALFW -0.00140534 0.000542958 -2.58830 4.822547e-03
## 2657 TXFW,ALST;LAFW,ALFW -0.00416643 0.000526403 -7.91491 1.237160e-15
f3[f3$pops == "TXFW;LAFW,ALFW",]
##               pops         f         SE       Z p
## 630 TXFW;LAFW,ALFW 0.0349272 0.00105283 33.1745 1

The three-population test does not fail the treeness test but the four-population tests do fail the treeness tests. This suggests that TXFW functions as an outgroup to the LAFW and ALFW - they are perhaps derived from the TXFW population rather than experiencing current admixture.

FLFW->TX/AL

FLLG is actually FLFW, and there’s a migration edge from it to the Alabama/Louisiana clade and the Texas clade. So, we can test FLLG;ALST,TXFW, FLLG;ALFW,TXCB, FLLG;ALFW,TXFW or FLLG;ALST,TXCC - see if the migration is specifically to freshwater populations or not.

checks<-c("FLLG;TXFW,ALST","FLLG;TXCB,ALFW","FLLG;TXCB,LAFW",#mix of freshwater
          "FLLG;TXFW,ALFW","FLLG;TXFW,LAFW",                 #all freshwater
          "FLLG;TXCC,ALST")                                  #no freshwater
f3[f3$pops %in% checks,]
##               pops         f         SE       Z p
## 452 FLLG;TXCC,ALST 0.0843051 0.00214158 39.3658 1
## 655 FLLG;TXFW,LAFW 0.0897111 0.00256943 34.9147 1
## 686 FLLG;TXFW,ALST 0.0833365 0.00215944 38.5917 1
## 713 FLLG;TXFW,ALFW 0.0928283 0.00279453 33.2179 1
## 853 FLLG;TXCB,LAFW 0.0857054 0.00220127 38.9345 1
## 911 FLLG;TXCB,ALFW 0.0874172 0.00219662 39.7963 1

These all pass the treeness test.

Plot the chosen tree

treemix.file<-as.character("treemix/p4_k100bFLPBr.cov.gz")
tm.vertices<-"treemix/p4_k100bFLPBrm4.vertices.gz"
tm.plot<-"treemix/p4_treemix_m4_FLPB.png"
tm.tree<-"treemix/p4_k100bFLPBrm4"

PopTree2

For the PopTree2 analysis, I need to convert the vcf file to genepop format. I did this using PGDSpider2. Then I ran PopTree2 on Windows10, using Da to calculate neighbor-joining trees and using 1000 bootstrap replicates.

Or, if that doesn’t work,

remove.missing.data<-function(vcf, pop.list){
  exclude<-NULL
  for(i in 1:nrow(vcf))
  {
    vcf.row<-vcf[i,colnames(vcf) != "SNP"]#remove this if it exists
    missingness<-unlist(lapply(pop.list,function(pop){
      pop.vcf<-vcf.row[,grep(pop,colnames(vcf.row))]
      missing<-length(grep("\\.\\/\\.",pop.vcf))
      prop.missing<-missing/length(pop.vcf)
      return(prop.missing)
    }))
    if(length(missingness[missingness==1])>0){
      print(paste("Row ", i, " is has no data for pop ", pop.list[which(missingness==1)]))
      exclude<-c(exclude,i)
    } 
  }
  if(!is.null(exclude)){
    return(vcf[-exclude,])
  }else{
    return(vcf)
  }
}
gpop.name<-"poptree/p4.genepop"
sub.prefix<-"poptree/p4_"
vcf<-remove.missing.data(vcf, pop.list)
## [1] "Row  9  is has no data for pop  LAFW"
## [1] "Row  17  is has no data for pop  FLCC"
## [2] "Row  17  is has no data for pop  FLLG"
## [1] "Row  26  is has no data for pop  LAFW"
## [1] "Row  30  is has no data for pop  LAFW"
## [1] "Row  46  is has no data for pop  FLLG"
## [1] "Row  63  is has no data for pop  LAFW"
## [1] "Row  68  is has no data for pop  ALFW"
## [1] "Row  107  is has no data for pop  FLLG"
## [1] "Row  118  is has no data for pop  LAFW"
## [1] "Row  127  is has no data for pop  FLCC"
## [2] "Row  127  is has no data for pop  FLLG"
## [1] "Row  132  is has no data for pop  LAFW"
## [1] "Row  175  is has no data for pop  LAFW"
## [1] "Row  182  is has no data for pop  LAFW"
## [1] "Row  214  is has no data for pop  FLLG"
## [1] "Row  311  is has no data for pop  LAFW"
## [2] "Row  311  is has no data for pop  FLLG"
## [1] "Row  322  is has no data for pop  LAFW"
## [1] "Row  334  is has no data for pop  FLLG"
## [1] "Row  357  is has no data for pop  FLLG"
## [1] "Row  394  is has no data for pop  LAFW"
## [2] "Row  394  is has no data for pop  FLLG"
## [1] "Row  414  is has no data for pop  LAFW"
## [1] "Row  448  is has no data for pop  FLLG"
## [1] "Row  484  is has no data for pop  TXFW"
## [1] "Row  499  is has no data for pop  LAFW"
## [1] "Row  520  is has no data for pop  LAFW"
## [1] "Row  531  is has no data for pop  TXFW"
## [1] "Row  543  is has no data for pop  LAFW"
## [1] "Row  545  is has no data for pop  LAFW"
## [1] "Row  551  is has no data for pop  FLLG"
## [1] "Row  592  is has no data for pop  FLLG"
## [1] "Row  599  is has no data for pop  FLLG"
## [1] "Row  609  is has no data for pop  FLLG"
## [1] "Row  616  is has no data for pop  FLLG"
## [1] "Row  655  is has no data for pop  LAFW"
## [1] "Row  694  is has no data for pop  FLLG"
## [1] "Row  698  is has no data for pop  TXFW"
## [1] "Row  709  is has no data for pop  FLLG"
## [1] "Row  731  is has no data for pop  LAFW"
## [1] "Row  758  is has no data for pop  FLCC"
## [1] "Row  775  is has no data for pop  FLLG"
## [1] "Row  777  is has no data for pop  LAFW"
## [1] "Row  783  is has no data for pop  FLLG"
## [1] "Row  810  is has no data for pop  FLLG"
## [1] "Row  811  is has no data for pop  LAFW"
## [2] "Row  811  is has no data for pop  FLLG"
## [1] "Row  829  is has no data for pop  LAFW"
## [1] "Row  832  is has no data for pop  FLLG"
## [1] "Row  851  is has no data for pop  FLLG"
## [1] "Row  854  is has no data for pop  LAFW"
## [2] "Row  854  is has no data for pop  FLLG"
## [1] "Row  865  is has no data for pop  FLLG"
## [1] "Row  874  is has no data for pop  LAFW"
## [1] "Row  881  is has no data for pop  TXFW"
## [1] "Row  933  is has no data for pop  LAFW"
## [2] "Row  933  is has no data for pop  ALFW"
## [1] "Row  937  is has no data for pop  FLLG"
## [1] "Row  953  is has no data for pop  FLLG"
## [1] "Row  954  is has no data for pop  LAFW"
## [1] "Row  1009  is has no data for pop  FLLG"
## [1] "Row  1012  is has no data for pop  FLLG"
## [1] "Row  1061  is has no data for pop  FLLG"
## [1] "Row  1086  is has no data for pop  FLLG"
## [1] "Row  1091  is has no data for pop  FLLG"
## [1] "Row  1094  is has no data for pop  FLLG"
## [1] "Row  1145  is has no data for pop  LAFW"
## [1] "Row  1168  is has no data for pop  LAFW"
## [1] "Row  1195  is has no data for pop  LAFW"
## [1] "Row  1198  is has no data for pop  LAFW"
## [1] "Row  1203  is has no data for pop  FLLG"
## [1] "Row  1223  is has no data for pop  LAFW"
## [2] "Row  1223  is has no data for pop  FLLG"
## [1] "Row  1236  is has no data for pop  FLLG"
## [1] "Row  1252  is has no data for pop  LAFW"
## [1] "Row  1257  is has no data for pop  FLLG"
## [1] "Row  1278  is has no data for pop  FLLG"
## [1] "Row  1301  is has no data for pop  LAFW"
## [1] "Row  1304  is has no data for pop  LAFW"
## [1] "Row  1307  is has no data for pop  FLLG"
## [1] "Row  1321  is has no data for pop  LAFW"
## [2] "Row  1321  is has no data for pop  FLLG"
## [1] "Row  1342  is has no data for pop  LAFW"
## [1] "Row  1350  is has no data for pop  FLLG"
## [1] "Row  1351  is has no data for pop  LAFW"
## [2] "Row  1351  is has no data for pop  FLLG"
## [1] "Row  1363  is has no data for pop  LAFW"
## [1] "Row  1365  is has no data for pop  LAFW"
## [1] "Row  1373  is has no data for pop  LAFW"
## [1] "Row  1414  is has no data for pop  LAFW"
## [1] "Row  1453  is has no data for pop  LAFW"
## [1] "Row  1488  is has no data for pop  LAFW"
## [1] "Row  1518  is has no data for pop  LAFW"
## [1] "Row  1540  is has no data for pop  FLLG"
## [1] "Row  1557  is has no data for pop  FLLG"
## [1] "Row  1575  is has no data for pop  FLLG"
## [1] "Row  1595  is has no data for pop  FLLG"
## [1] "Row  1648  is has no data for pop  TXFW"
## [2] "Row  1648  is has no data for pop  FLLG"
## [1] "Row  1669  is has no data for pop  FLLG"
## [1] "Row  1683  is has no data for pop  LAFW"
## [1] "Row  1721  is has no data for pop  LAFW"
## [1] "Row  1729  is has no data for pop  ALFW"
## [1] "Row  1734  is has no data for pop  FLLG"
## [1] "Row  1737  is has no data for pop  FLLG"
## [1] "Row  1752  is has no data for pop  LAFW"
## [1] "Row  1781  is has no data for pop  LAFW"
## [1] "Row  1783  is has no data for pop  LAFW"
## [1] "Row  1785  is has no data for pop  FLLG"
## [1] "Row  1793  is has no data for pop  LAFW"
## [1] "Row  1805  is has no data for pop  LAFW"
## [1] "Row  1819  is has no data for pop  LAFW"
## [2] "Row  1819  is has no data for pop  FLLG"
## [1] "Row  1838  is has no data for pop  FLLG"
## [1] "Row  1844  is has no data for pop  TXFW"
## [1] "Row  1848  is has no data for pop  TXFW"
## [1] "Row  1852  is has no data for pop  TXFW"
## [1] "Row  1859  is has no data for pop  FLLG"
## [1] "Row  1860  is has no data for pop  FLLG"
## [1] "Row  1863  is has no data for pop  FLLG"
## [1] "Row  1871  is has no data for pop  LAFW"
## [1] "Row  1880  is has no data for pop  FLCC"
## [2] "Row  1880  is has no data for pop  FLLG"
## [1] "Row  1883  is has no data for pop  FLLG"
## [1] "Row  1919  is has no data for pop  TXFW"
## [1] "Row  1951  is has no data for pop  FLLG"
## [1] "Row  1954  is has no data for pop  FLLG"
## [1] "Row  1989  is has no data for pop  FLLG"
## [1] "Row  1998  is has no data for pop  TXFW"
## [2] "Row  1998  is has no data for pop  TXCB"
## [1] "Row  2006  is has no data for pop  FLLG"
## [1] "Row  2017  is has no data for pop  FLLG"
## [1] "Row  2066  is has no data for pop  FLLG"
## [1] "Row  2068  is has no data for pop  FLLG"
## [1] "Row  2071  is has no data for pop  LAFW"
## [1] "Row  2112  is has no data for pop  FLLG"
## [1] "Row  2147  is has no data for pop  FLLG"
## [1] "Row  2170  is has no data for pop  LAFW"
## [1] "Row  2214  is has no data for pop  FLLG"
## [1] "Row  2220  is has no data for pop  FLLG"
## [1] "Row  2282  is has no data for pop  FLLG"
## [1] "Row  2289  is has no data for pop  FLLG"
## [1] "Row  2291  is has no data for pop  LAFW"
## [1] "Row  2304  is has no data for pop  FLLG"
## [1] "Row  2326  is has no data for pop  FLLG"
## [1] "Row  2328  is has no data for pop  LAFW"
## [1] "Row  2333  is has no data for pop  FLLG"
## [1] "Row  2340  is has no data for pop  LAFW"
## [1] "Row  2364  is has no data for pop  FLLG"
## [1] "Row  2373  is has no data for pop  FLCC"
## [2] "Row  2373  is has no data for pop  FLLG"
## [1] "Row  2404  is has no data for pop  FLLG"
## [1] "Row  2411  is has no data for pop  FLLG"
## [1] "Row  2424  is has no data for pop  FLLG"
## [1] "Row  2438  is has no data for pop  FLLG"
## [1] "Row  2445  is has no data for pop  FLLG"
## [1] "Row  2448  is has no data for pop  FLLG"
## [1] "Row  2456  is has no data for pop  FLLG"
## [1] "Row  2475  is has no data for pop  FLLG"
## [1] "Row  2486  is has no data for pop  FLLG"
## [1] "Row  2546  is has no data for pop  FLLG"
## [1] "Row  2555  is has no data for pop  FLLG"
## [1] "Row  2578  is has no data for pop  LAFW"
## [2] "Row  2578  is has no data for pop  FLLG"
## [1] "Row  2587  is has no data for pop  FLLG"
## [1] "Row  2615  is has no data for pop  LAFW"
## [1] "Row  2634  is has no data for pop  FLLG"
## [1] "Row  2642  is has no data for pop  LAFW"
## [1] "Row  2659  is has no data for pop  FLLG"
## [1] "Row  2674  is has no data for pop  FLLG"
## [1] "Row  2720  is has no data for pop  LAFW"
## [1] "Row  2729  is has no data for pop  LAFW"
## [2] "Row  2729  is has no data for pop  FLLG"
## [1] "Row  2748  is has no data for pop  FLLG"
## [1] "Row  2815  is has no data for pop  TXFW"
## [1] "Row  2823  is has no data for pop  FLLG"
## [1] "Row  2825  is has no data for pop  FLLG"
## [1] "Row  2831  is has no data for pop  FLLG"
## [1] "Row  2840  is has no data for pop  FLLG"
## [1] "Row  2854  is has no data for pop  LAFW"
## [1] "Row  2917  is has no data for pop  LAFW"
## [1] "Row  2919  is has no data for pop  LAFW"
## [1] "Row  2961  is has no data for pop  LAFW"
## [1] "Row  2969  is has no data for pop  FLPB"
## [2] "Row  2969  is has no data for pop  FLHB"
## [3] "Row  2969  is has no data for pop  FLCC"
## [4] "Row  2969  is has no data for pop  FLLG"
## [1] "Row  2977  is has no data for pop  LAFW"
## [1] "Row  2979  is has no data for pop  FLLG"
## [1] "Row  2981  is has no data for pop  LAFW"
## [1] "Row  2985  is has no data for pop  LAFW"
## [1] "Row  2991  is has no data for pop  FLLG"
## [1] "Row  2995  is has no data for pop  LAFW"
## [1] "Row  3003  is has no data for pop  LAFW"
## [1] "Row  3013  is has no data for pop  LAFW"
## [1] "Row  3026  is has no data for pop  LAFW"
## [2] "Row  3026  is has no data for pop  FLLG"
## [1] "Row  3095  is has no data for pop  LAFW"
## [1] "Row  3128  is has no data for pop  FLLG"
## [1] "Row  3158  is has no data for pop  FLCC"
## [2] "Row  3158  is has no data for pop  FLLG"
## [1] "Row  3172  is has no data for pop  FLLG"
## [1] "Row  3177  is has no data for pop  LAFW"
## [1] "Row  3181  is has no data for pop  FLLG"
## [1] "Row  3203  is has no data for pop  LAFW"
## [1] "Row  3208  is has no data for pop  LAFW"
## [2] "Row  3208  is has no data for pop  FLLG"
## [1] "Row  3254  is has no data for pop  LAFW"
## [1] "Row  3256  is has no data for pop  LAFW"
## [1] "Row  3259  is has no data for pop  FLLG"
## [1] "Row  3260  is has no data for pop  FLLG"
## [1] "Row  3294  is has no data for pop  FLLG"
## [1] "Row  3305  is has no data for pop  LAFW"
## [1] "Row  3306  is has no data for pop  LAFW"
## [1] "Row  3311  is has no data for pop  TXFW"
## [1] "Row  3373  is has no data for pop  LAFW"
## [1] "Row  3379  is has no data for pop  LAFW"
## [1] "Row  3393  is has no data for pop  FLLG"
## [1] "Row  3394  is has no data for pop  FLLG"
## [1] "Row  3401  is has no data for pop  FLLG"
## [1] "Row  3408  is has no data for pop  LAFW"
## [1] "Row  3416  is has no data for pop  LAFW"
## [1] "Row  3462  is has no data for pop  LAFW"
## [1] "Row  3467  is has no data for pop  FLLG"
## [1] "Row  3470  is has no data for pop  FLLG"
## [1] "Row  3478  is has no data for pop  FLLG"
## [1] "Row  3484  is has no data for pop  FLLG"
## [1] "Row  3510  is has no data for pop  LAFW"
## [1] "Row  3511  is has no data for pop  FLLG"
## [1] "Row  3520  is has no data for pop  FLLG"
## [1] "Row  3522  is has no data for pop  LAFW"
## [1] "Row  3527  is has no data for pop  LAFW"
## [1] "Row  3534  is has no data for pop  FLLG"
## [1] "Row  3544  is has no data for pop  LAFW"
## [1] "Row  3564  is has no data for pop  LAFW"
## [2] "Row  3564  is has no data for pop  FLLG"
## [1] "Row  3570  is has no data for pop  FLLG"
## [1] "Row  3574  is has no data for pop  FLLG"
## [1] "Row  3575  is has no data for pop  FLLG"
## [1] "Row  3580  is has no data for pop  FLLG"
## [1] "Row  3583  is has no data for pop  LAFW"
## [1] "Row  3620  is has no data for pop  FLLG"
## [1] "Row  3688  is has no data for pop  FLLG"
## [1] "Row  3696  is has no data for pop  FLLG"
## [1] "Row  3703  is has no data for pop  FLLG"
## [1] "Row  3710  is has no data for pop  LAFW"
## [1] "Row  3730  is has no data for pop  TXSP"
## [2] "Row  3730  is has no data for pop  TXCC"
## [3] "Row  3730  is has no data for pop  TXFW"
## [4] "Row  3730  is has no data for pop  TXCB"
## [1] "Row  3741  is has no data for pop  LAFW"
## [1] "Row  3754  is has no data for pop  LAFW"
## [1] "Row  3783  is has no data for pop  FLLG"
## [1] "Row  3790  is has no data for pop  FLLG"
## [1] "Row  3828  is has no data for pop  FLLG"
## [1] "Row  3871  is has no data for pop  LAFW"
## [1] "Row  3956  is has no data for pop  LAFW"
## [1] "Row  3959  is has no data for pop  FLLG"
## [1] "Row  3976  is has no data for pop  FLLG"
## [1] "Row  4010  is has no data for pop  LAFW"
## [1] "Row  4041  is has no data for pop  LAFW"
## [1] "Row  4066  is has no data for pop  LAFW"
## [2] "Row  4066  is has no data for pop  FLLG"
## [1] "Row  4080  is has no data for pop  FLLG"
## [1] "Row  4085  is has no data for pop  FLLG"
## [1] "Row  4155  is has no data for pop  TXFW"
## [1] "Row  4168  is has no data for pop  FLLG"
## [1] "Row  4175  is has no data for pop  FLLG"
## [1] "Row  4202  is has no data for pop  FLLG"
## [1] "Row  4215  is has no data for pop  FLLG"
## [1] "Row  4230  is has no data for pop  LAFW"
## [2] "Row  4230  is has no data for pop  FLLG"
## [1] "Row  4234  is has no data for pop  FLLG"
## [1] "Row  4245  is has no data for pop  FLLG"
## [1] "Row  4282  is has no data for pop  FLLG"
## [1] "Row  4314  is has no data for pop  LAFW"
## [1] "Row  4316  is has no data for pop  LAFW"
## [1] "Row  4373  is has no data for pop  LAFW"
## [1] "Row  4393  is has no data for pop  LAFW"
## [1] "Row  4422  is has no data for pop  FLLG"
## [1] "Row  4471  is has no data for pop  FLLG"
## [1] "Row  4477  is has no data for pop  FLLG"
## [1] "Row  4479  is has no data for pop  LAFW"
## [1] "Row  4517  is has no data for pop  LAFW"
## [1] "Row  4529  is has no data for pop  LAFW"
## [1] "Row  4533  is has no data for pop  FLLG"
## [1] "Row  4551  is has no data for pop  FLLG"
## [1] "Row  4573  is has no data for pop  LAFW"
## [1] "Row  4604  is has no data for pop  FLLG"
## [1] "Row  4616  is has no data for pop  LAFW"
## [1] "Row  4631  is has no data for pop  LAFW"
## [1] "Row  4638  is has no data for pop  LAFW"
## [1] "Row  4673  is has no data for pop  LAFW"
## [1] "Row  4688  is has no data for pop  LAFW"
## [1] "Row  4690  is has no data for pop  TXFW"
## [1] "Row  4700  is has no data for pop  FLLG"
## [1] "Row  4710  is has no data for pop  LAFW"
## [1] "Row  4730  is has no data for pop  FLCC"
## [2] "Row  4730  is has no data for pop  FLLG"
## [1] "Row  4738  is has no data for pop  FLLG"
## [1] "Row  4741  is has no data for pop  FLLG"
## [1] "Row  4763  is has no data for pop  LAFW"
## [2] "Row  4763  is has no data for pop  FLLG"
## [1] "Row  4779  is has no data for pop  LAFW"
## [2] "Row  4779  is has no data for pop  FLLG"
## [1] "Row  4781  is has no data for pop  LAFW"
## [1] "Row  4789  is has no data for pop  FLLG"
## [1] "Row  4808  is has no data for pop  FLCC"
## [2] "Row  4808  is has no data for pop  FLLG"
## [1] "Row  4826  is has no data for pop  FLLG"
## [1] "Row  4834  is has no data for pop  LAFW"
## [1] "Row  4862  is has no data for pop  LAFW"
## [1] "Row  4898  is has no data for pop  FLLG"
## [1] "Row  4921  is has no data for pop  ALFW"
## [1] "Row  4967  is has no data for pop  FLLG"
## [1] "Row  4997  is has no data for pop  FLLG"
## [1] "Row  5003  is has no data for pop  FLLG"
## [1] "Row  5006  is has no data for pop  FLLG"
## [1] "Row  5033  is has no data for pop  FLLG"
## [1] "Row  5089  is has no data for pop  ALFW"
## [1] "Row  5098  is has no data for pop  FLLG"
## [1] "Row  5117  is has no data for pop  FLLG"
## [1] "Row  5120  is has no data for pop  LAFW"
## [1] "Row  5125  is has no data for pop  FLLG"
## [1] "Row  5148  is has no data for pop  LAFW"
## [1] "Row  5153  is has no data for pop  LAFW"
## [1] "Row  5179  is has no data for pop  LAFW"
## [2] "Row  5179  is has no data for pop  FLLG"
## [1] "Row  5209  is has no data for pop  FLLG"
## [1] "Row  5295  is has no data for pop  FLLG"
## [1] "Row  5350  is has no data for pop  LAFW"
## [1] "Row  5355  is has no data for pop  FLLG"
## [1] "Row  5393  is has no data for pop  LAFW"
## [1] "Row  5402  is has no data for pop  FLLG"
## [1] "Row  5444  is has no data for pop  LAFW"
## [2] "Row  5444  is has no data for pop  FLLG"
## [1] "Row  5451  is has no data for pop  LAFW"
## [1] "Row  5462  is has no data for pop  LAFW"
## [1] "Row  5471  is has no data for pop  FLLG"
## [1] "Row  5478  is has no data for pop  FLLG"
## [1] "Row  5490  is has no data for pop  LAFW"
## [2] "Row  5490  is has no data for pop  FLLG"
## [1] "Row  5513  is has no data for pop  LAFW"
## [1] "Row  5522  is has no data for pop  FLLG"
## [1] "Row  5543  is has no data for pop  LAFW"
## [1] "Row  5573  is has no data for pop  ALFW"
## [1] "Row  5579  is has no data for pop  LAFW"
## [1] "Row  5583  is has no data for pop  LAFW"
## [1] "Row  5585  is has no data for pop  ALFW"
## [1] "Row  5587  is has no data for pop  FLHB"
## [2] "Row  5587  is has no data for pop  FLCC"
## [3] "Row  5587  is has no data for pop  FLLG"
## [1] "Row  5590  is has no data for pop  TXSP"
## [2] "Row  5590  is has no data for pop  TXCC"
## [3] "Row  5590  is has no data for pop  TXFW"
## [4] "Row  5590  is has no data for pop  TXCB"
## [5] "Row  5590  is has no data for pop  LAFW"
## [6] "Row  5590  is has no data for pop  ALFW"
## [1] "Row  5608  is has no data for pop  TXFW"
## [1] "Row  5611  is has no data for pop  LAFW"
## [1] "Row  5618  is has no data for pop  LAFW"
## [1] "Row  5636  is has no data for pop  LAFW"
## [1] "Row  5640  is has no data for pop  LAFW"
## [2] "Row  5640  is has no data for pop  FLLG"
## [1] "Row  5650  is has no data for pop  FLLG"
## [1] "Row  5705  is has no data for pop  LAFW"
## [1] "Row  5715  is has no data for pop  FLLG"
## [1] "Row  5737  is has no data for pop  LAFW"
## [1] "Row  5745  is has no data for pop  LAFW"
## [1] "Row  5762  is has no data for pop  FLLG"
## [1] "Row  5766  is has no data for pop  LAFW"
## [1] "Row  5797  is has no data for pop  LAFW"
## [2] "Row  5797  is has no data for pop  FLLG"
## [1] "Row  5814  is has no data for pop  FLLG"
## [1] "Row  5855  is has no data for pop  FLLG"
## [1] "Row  5874  is has no data for pop  LAFW"
## [1] "Row  5875  is has no data for pop  LAFW"
## [1] "Row  5886  is has no data for pop  LAFW"
## [1] "Row  5899  is has no data for pop  FLLG"
## [1] "Row  5901  is has no data for pop  FLCC"
## [2] "Row  5901  is has no data for pop  FLLG"
## [1] "Row  5904  is has no data for pop  LAFW"
## [1] "Row  5926  is has no data for pop  LAFW"
## [1] "Row  5935  is has no data for pop  LAFW"
## [2] "Row  5935  is has no data for pop  FLLG"
## [1] "Row  5942  is has no data for pop  LAFW"
## [1] "Row  5947  is has no data for pop  LAFW"
## [1] "Row  6050  is has no data for pop  LAFW"
## [2] "Row  6050  is has no data for pop  FLLG"
## [1] "Row  6066  is has no data for pop  LAFW"
## [1] "Row  6084  is has no data for pop  LAFW"
## [1] "Row  6128  is has no data for pop  TXFW"
## [1] "Row  6171  is has no data for pop  FLHB"
## [1] "Row  6276  is has no data for pop  LAFW"
## [1] "Row  6280  is has no data for pop  FLLG"
## [1] "Row  6327  is has no data for pop  FLLG"
## [1] "Row  6336  is has no data for pop  FLLG"
## [1] "Row  6337  is has no data for pop  LAFW"
## [1] "Row  6342  is has no data for pop  LAFW"
## [1] "Row  6357  is has no data for pop  FLLG"
## [1] "Row  6379  is has no data for pop  FLLG"
## [1] "Row  6395  is has no data for pop  FLLG"
## [1] "Row  6424  is has no data for pop  FLLG"
## [1] "Row  6428  is has no data for pop  FLLG"
## [1] "Row  6436  is has no data for pop  LAFW"
## [1] "Row  6543  is has no data for pop  LAFW"
## [2] "Row  6543  is has no data for pop  FLLG"
## [1] "Row  6553  is has no data for pop  LAFW"
## [1] "Row  6584  is has no data for pop  LAFW"
## [1] "Row  6627  is has no data for pop  LAFW"
## [1] "Row  6691  is has no data for pop  LAFW"
## [1] "Row  6703  is has no data for pop  LAFW"
## [2] "Row  6703  is has no data for pop  FLLG"
## [1] "Row  6736  is has no data for pop  LAFW"
## [1] "Row  6784  is has no data for pop  LAFW"
## [1] "Row  6785  is has no data for pop  FLLG"
## [1] "Row  6792  is has no data for pop  FLLG"
## [1] "Row  6795  is has no data for pop  FLLG"
## [1] "Row  6801  is has no data for pop  LAFW"
## [1] "Row  6803  is has no data for pop  LAFW"
## [1] "Row  6823  is has no data for pop  FLLG"
## [1] "Row  6826  is has no data for pop  LAFW"
## [1] "Row  6836  is has no data for pop  FLCC"
## [2] "Row  6836  is has no data for pop  FLLG"
## [1] "Row  6840  is has no data for pop  FLLG"
## [1] "Row  6894  is has no data for pop  LAFW"
## [1] "Row  6900  is has no data for pop  FLLG"
## [1] "Row  6910  is has no data for pop  FLLG"
## [1] "Row  6970  is has no data for pop  LAFW"
## [1] "Row  6997  is has no data for pop  LAFW"
## [1] "Row  7005  is has no data for pop  FLLG"
## [1] "Row  7026  is has no data for pop  FLPB"
## [2] "Row  7026  is has no data for pop  FLHB"
## [3] "Row  7026  is has no data for pop  FLCC"
## [4] "Row  7026  is has no data for pop  FLLG"
## [1] "Row  7031  is has no data for pop  FLLG"
## [1] "Row  7042  is has no data for pop  LAFW"
## [1] "Row  7071  is has no data for pop  TXFW"
## [2] "Row  7071  is has no data for pop  ALFW"
## [1] "Row  7101  is has no data for pop  LAFW"
## [1] "Row  7103  is has no data for pop  LAFW"
## [1] "Row  7111  is has no data for pop  LAFW"
## [1] "Row  7126  is has no data for pop  FLLG"
## [1] "Row  7133  is has no data for pop  LAFW"
## [1] "Row  7151  is has no data for pop  FLLG"
## [1] "Row  7203  is has no data for pop  FLLG"
## [1] "Row  7212  is has no data for pop  FLLG"
## [1] "Row  7238  is has no data for pop  LAFW"
## [1] "Row  7255  is has no data for pop  FLLG"
## [1] "Row  7259  is has no data for pop  FLLG"
## [1] "Row  7273  is has no data for pop  FLLG"
## [1] "Row  7280  is has no data for pop  FLLG"
## [1] "Row  7303  is has no data for pop  FLLG"
## [1] "Row  7320  is has no data for pop  LAFW"
## [1] "Row  7344  is has no data for pop  FLLG"
## [1] "Row  7346  is has no data for pop  LAFW"
## [1] "Row  7370  is has no data for pop  FLLG"
## [1] "Row  7371  is has no data for pop  FLLG"
## [1] "Row  7399  is has no data for pop  LAFW"
## [1] "Row  7447  is has no data for pop  FLLG"
## [1] "Row  7471  is has no data for pop  LAFW"
## [1] "Row  7542  is has no data for pop  FLLG"
## [1] "Row  7571  is has no data for pop  FLLG"
## [1] "Row  7581  is has no data for pop  LAFW"
## [1] "Row  7586  is has no data for pop  FLLG"
## [1] "Row  7617  is has no data for pop  FLLG"
## [1] "Row  7627  is has no data for pop  FLLG"
## [1] "Row  7660  is has no data for pop  FLLG"
## [1] "Row  7671  is has no data for pop  LAFW"
## [1] "Row  7683  is has no data for pop  LAFW"
## [1] "Row  7740  is has no data for pop  FLLG"
## [1] "Row  7752  is has no data for pop  FLLG"
## [1] "Row  7755  is has no data for pop  FLLG"
## [1] "Row  7785  is has no data for pop  LAFW"
## [1] "Row  7789  is has no data for pop  FLLG"
## [1] "Row  7807  is has no data for pop  LAFW"
## [1] "Row  7823  is has no data for pop  LAFW"
## [1] "Row  7855  is has no data for pop  FLLG"
## [1] "Row  7863  is has no data for pop  FLLG"
## [1] "Row  7867  is has no data for pop  FLLG"
## [1] "Row  7869  is has no data for pop  LAFW"
## [1] "Row  7885  is has no data for pop  LAFW"
## [2] "Row  7885  is has no data for pop  FLLG"
## [1] "Row  7891  is has no data for pop  FLLG"
## [1] "Row  7918  is has no data for pop  FLLG"
## [1] "Row  7937  is has no data for pop  TXCC"
## [2] "Row  7937  is has no data for pop  TXFW"
## [3] "Row  7937  is has no data for pop  TXCB"
## [4] "Row  7937  is has no data for pop  LAFW"
## [5] "Row  7937  is has no data for pop  ALFW"
## [1] "Row  7955  is has no data for pop  FLLG"
## [1] "Row  7961  is has no data for pop  TXFW"
## [1] "Row  7970  is has no data for pop  FLLG"
## [1] "Row  7981  is has no data for pop  LAFW"
## [1] "Row  8038  is has no data for pop  FLLG"
## [1] "Row  8046  is has no data for pop  FLLG"
## [1] "Row  8055  is has no data for pop  FLLG"
## [1] "Row  8057  is has no data for pop  LAFW"
## [1] "Row  8073  is has no data for pop  FLLG"
## [1] "Row  8077  is has no data for pop  FLLG"
## [1] "Row  8091  is has no data for pop  FLLG"
## [1] "Row  8106  is has no data for pop  LAFW"
## [1] "Row  8114  is has no data for pop  FLLG"
## [1] "Row  8173  is has no data for pop  FLLG"
## [1] "Row  8217  is has no data for pop  FLLG"
## [1] "Row  8222  is has no data for pop  LAFW"
## [1] "Row  8227  is has no data for pop  LAFW"
## [1] "Row  8266  is has no data for pop  LAFW"
## [2] "Row  8266  is has no data for pop  FLLG"
## [1] "Row  8269  is has no data for pop  FLLG"
## [1] "Row  8270  is has no data for pop  FLLG"
## [1] "Row  8292  is has no data for pop  TXFW"
## [2] "Row  8292  is has no data for pop  ALFW"
## [1] "Row  8319  is has no data for pop  FLLG"
## [1] "Row  8324  is has no data for pop  LAFW"
## [1] "Row  8333  is has no data for pop  FLLG"
## [1] "Row  8372  is has no data for pop  FLLG"
## [1] "Row  8399  is has no data for pop  FLLG"
## [1] "Row  8404  is has no data for pop  LAFW"
## [1] "Row  8405  is has no data for pop  FLLG"
## [1] "Row  8414  is has no data for pop  FLLG"
## [1] "Row  8416  is has no data for pop  FLHB"
## [2] "Row  8416  is has no data for pop  FLCC"
## [3] "Row  8416  is has no data for pop  FLLG"
## [1] "Row  8447  is has no data for pop  FLLG"
## [1] "Row  8454  is has no data for pop  FLLG"
## [1] "Row  8455  is has no data for pop  LAFW"
## [1] "Row  8458  is has no data for pop  FLLG"
## [1] "Row  8481  is has no data for pop  FLLG"
## [1] "Row  8498  is has no data for pop  FLLG"
## [1] "Row  8568  is has no data for pop  LAFW"
## [2] "Row  8568  is has no data for pop  FLLG"
## [1] "Row  8572  is has no data for pop  LAFW"
## [1] "Row  8585  is has no data for pop  LAFW"
## [1] "Row  8598  is has no data for pop  LAFW"
## [1] "Row  8601  is has no data for pop  LAFW"
## [1] "Row  8625  is has no data for pop  FLLG"
## [1] "Row  8630  is has no data for pop  LAFW"
## [1] "Row  8644  is has no data for pop  FLLG"
## [1] "Row  8657  is has no data for pop  FLLG"
## [1] "Row  8783  is has no data for pop  FLLG"
## [1] "Row  8798  is has no data for pop  LAFW"
## [1] "Row  8854  is has no data for pop  LAFW"
## [1] "Row  8860  is has no data for pop  LAFW"
## [1] "Row  8866  is has no data for pop  FLLG"
## [1] "Row  8872  is has no data for pop  LAFW"
## [2] "Row  8872  is has no data for pop  FLLG"
## [1] "Row  8886  is has no data for pop  FLLG"
## [1] "Row  8891  is has no data for pop  LAFW"
## [1] "Row  8892  is has no data for pop  LAFW"
## [2] "Row  8892  is has no data for pop  FLLG"
## [1] "Row  8928  is has no data for pop  FLLG"
## [1] "Row  8952  is has no data for pop  FLLG"
## [1] "Row  8975  is has no data for pop  FLLG"
## [1] "Row  8979  is has no data for pop  LAFW"
## [1] "Row  9014  is has no data for pop  LAFW"
## [1] "Row  9061  is has no data for pop  LAFW"
## [1] "Row  9070  is has no data for pop  FLLG"
## [1] "Row  9076  is has no data for pop  LAFW"
## [2] "Row  9076  is has no data for pop  ALFW"
## [1] "Row  9077  is has no data for pop  TXFW"
## [1] "Row  9093  is has no data for pop  LAFW"
## [1] "Row  9104  is has no data for pop  LAFW"
## [2] "Row  9104  is has no data for pop  FLLG"
## [1] "Row  9133  is has no data for pop  LAFW"
## [1] "Row  9163  is has no data for pop  FLLG"
## [1] "Row  9191  is has no data for pop  FLLG"
## [1] "Row  9222  is has no data for pop  FLLG"
## [1] "Row  9281  is has no data for pop  ALFW"
## [1] "Row  9312  is has no data for pop  FLLG"
## [1] "Row  9352  is has no data for pop  LAFW"
## [1] "Row  9410  is has no data for pop  FLLG"
## [1] "Row  9429  is has no data for pop  LAFW"
## [2] "Row  9429  is has no data for pop  FLLG"
## [1] "Row  9472  is has no data for pop  ALFW"
## [1] "Row  9496  is has no data for pop  LAFW"
## [1] "Row  9507  is has no data for pop  LAFW"
## [1] "Row  9555  is has no data for pop  FLLG"
## [1] "Row  9556  is has no data for pop  FLLG"
## [1] "Row  9563  is has no data for pop  FLLG"
## [1] "Row  9616  is has no data for pop  TXCB"
## [1] "Row  9633  is has no data for pop  LAFW"
## [2] "Row  9633  is has no data for pop  FLLG"
for(i in 1:10){
  rowsub<-sample(nrow(vcf),1000,replace = FALSE)
  gpopsub<-vcf2gpop(vcf[rowsub,colnames(vcf)!="SNP"],pop.list,paste(sub.prefix,i,".genepop",sep=""))
}
gpop<-vcf2gpop(vcf[,colnames(vcf)!="SNP"],pop.list,gpop.name)
#then run poptree on all of them

And then run poptree. Poptree ran on the full dataset as well as the subsets of 1000 SNPs. Did they all provide similar results? What does the consensus tree look like?

poptree.dir<-"poptree/"
poptree.prefix<-"p4"
poptree.png<-"p4.poptree.png"
poptree.files<-list.files(path = poptree.dir,pattern=paste(poptree.prefix,".*.nwk",sep=""))
poptree.files<-lapply(poptree.files,function(x){ paste("poptree",x,sep="/")})
poptrees<-lapply(poptree.files,read.tree)
con.poptree<-consensus(poptrees)
con.poptree$tip.label[con.poptree$tip.label=="FLLG"]<-"FLFW"

clcolr <- rep("black", dim(con.poptree$edge)[1])
#clcolr[c(12,13,14,24)]<-all.colors[3]
#png(paste(poptree.dir,poptree.prefix,".consensus.png",sep=""),height=7,width=7,units="in",res=300)
#dev.off()
png(paste(poptree.dir,poptree.prefix,".png",sep=""),height=10,width=10,units="in",res=300)
par(mfrow=c(3,4),oma=c(1,1,1,1),mar=c(1,1,1,1))
for(i in 1:length(poptrees)){
  plot.phylo(poptrees[[i]],cex=1.5)
  mtext(poptree.files[i],3)
}
plot.phylo(con.poptree,tip.color = c(rep(grp.colors[6],4),grp.colors[5],
                                     rep(grp.colors[1],4),rep(grp.colors[2],3),
                                     rep(grp.colors[3],4)),
           edge.color = clcolr,edge.width = 2,cex=1,font=1)
mtext("Consensus")
dev.off()
## png 
##   2
All Trees

All Trees

Based on this, I’m going to move forward just with the full dataset (which includes bootstrap values).

#just the full subset tree
pt.subtree<-poptrees[[1]]
pt.subtree$tip.label[pt.subtree$tip.label=="FLLG"]<-"FLFW"
pt.colors<-pt.subtree$tip.label
pt.colors[pt.colors %in% "FLFW"]<-grp.colors[6]
pt.colors[pt.colors %in% c("FLPB","FLHB","FLCC")]<-grp.colors[6]
pt.colors[pt.colors %in% c("FLAB")]<-grp.colors[5]
pt.colors[pt.colors %in% c("FLSI","FLFD","FLKB","FLSG")]<-grp.colors[3]
pt.colors[pt.colors %in% c("ALST","ALFW","LAFW")]<-grp.colors[2]
pt.colors[pt.colors %in% c("TXSP","TXCC","TXFW","TXCB")]<-grp.colors[1]

clcolr <- rep("black", dim(pt.subtree$edge)[1])
clcolr[c(6,19,20,21,22)]<-all.colors[3]
png(poptree.png,height=7,width=7,units="in",res=300)
plot.phylo(pt.subtree,tip.color = pt.colors,
           edge.color = clcolr,edge.width = 2,label.offset = 0.0015)
dev.off()
## png 
##   2
PopTree

PopTree

Make Fig 3.

To get the Poptree distance matrix, I copied and pasted the distance matrix in the p4.out file into a new file and saved it as p4.distance.out. I had to first save each part of the matrix as text files, open them in Excel to standardize the spacings, merge them, and then save them.

pt.dist<-as.matrix(read.table("poptree/p4.distance.out",header=T,row.names=1,sep='\t'))
jostpw<-as.matrix(read.table("Subset.JostsD.tsv",header=T,sep='\t'))
pwise.fst.raw<-read.table("stacks/fwsw_fst_summary_subset.txt",header=T,row.names=1,sep='\t')#p4_fst_summary.txt
pwise.fst<-matrix(nrow=length(pop.list),ncol=length(pop.list))
dimnames(pwise.fst)[[1]]<-dimnames(pwise.fst)[[2]]<-pop.list
for(pop1 in 1:length(pop.list)){
  for(pop2 in 1:length(pop.list)){
    if(pop1 != pop2){
      raws<-c(pwise.fst.raw[pop.list[pop1],pop.list[pop2]],pwise.fst.raw[pop.list[pop2],pop.list[pop1]])
      pwise.fst[pop.list[pop1],pop.list[pop2]]<-raws[!is.na(raws)]
    }
  }
}
dimnames(pwise.fst)[[1]]<-dimnames(pwise.fst)[[2]]<-pop.labs
pwise.fst[lower.tri(pwise.fst)]<-NA
heatmaps.name<-"p4.heatmap.png"
colors<-c("blue","yellow","red")
pal<-colorRampPalette(colors)
ncol=80
cols<-pal(ncol)
cov<-read.table(gzfile(treemix.file), as.is = T, head = T, quote = "", comment.char = "")
#reorder
covplot <- data.frame(matrix(nrow = nrow(cov), ncol = ncol(cov)))
for(i in 1:length(pop.list)){
  for( j in 1:length(pop.list)){
    
    covplot[i, j] = cov[which(names(cov)==pop.list[i]), which(names(cov)==pop.list[j])]
    rownames(covplot)[i]<-pop.list[i]
    colnames(covplot)[j]<-pop.list[j]
  }
}
cp<-as.matrix(covplot)
cp[lower.tri(cp)]<-NA
cp[upper.tri(cp)]<-covplot[upper.tri(covplot)]
colnames(cp)<-pop.labs
rownames(cp)<-pop.labs
cp.lv<-levelplot(cp,col.regions=cols,alpha.regions=0.7,
          scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
dimnames(pt.dist)[[1]]<-dimnames(pt.dist)[[2]]<-pop.labs
dimnames(jostpw)[[1]]<-dimnames(jostpw)[[2]]<-pop.labs
colors<-c("black","darkgrey","grey","lightgrey","cornflowerblue")
pal<-colorRampPalette(colors)
ncol=80
cols<-pal(ncol)
rev.colors<-c("cornflowerblue","lightgrey","grey","darkgrey","black")
rev.pal<-colorRampPalette(rev.colors)
rev.cols<-rev.pal(ncol)

hm.height<-list(x=3.8,units="in")#2.2
hm.width<-list(x=3.9,units="in")#2.4 in RStudio
png(heatmaps.name,height=11,width=11,units="in",res=300)
fst.lv<-levelplot(as.matrix(pwise.fst),col.regions=cols,alpha.regions=0.7,
                  scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(fst.lv,split=c(1,1,2,2),more=TRUE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text(expression(italic(F)[ST]), 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

cp.lv<-levelplot(cp,col.regions=rev.cols,alpha.regions=0.7,
                 scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(cp.lv,split=c(1,2,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text("covariance", 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

jost.lv<-levelplot(jostpw,col.regions=cols,alpha.regions=0.7,
                   scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(jost.lv,split=c(2,1,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text(expression("Jost's"~italic(D)), 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

ptdist.lv<-levelplot(pt.dist,col.regions=cols,alpha.regions=0.7,
                     scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(ptdist.lv,split=c(2,2,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text("PopTree2\nDistance", 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

dev.off()
Figure 3. Heatmap

Figure 3. Heatmap

Figure 4

Figure 4 is the Poptree2 and Treemix trees next to each other

plotName<-"p4.trees.png"
tm.tree<-"treemix/p4_k100bFLPBrm4"
pt.subtree<-read.tree("poptree/p4.nwk")
rect.start<-0.1
clcolr <- rep("grey27", dim(pt.subtree$edge)[1])
clcolr[c(6,19,20,21,22)]<-all.colors[3]
pt.subtree$node.label<-round(as.numeric(pt.subtree$node.label)*100)
pt.subtree$tip.label[pt.subtree$tip.label=="FLLG"]<-"FLFW"
png(plotName,height=5,width=10,units="in",res=300)
par(mfrow=c(1,2),oma=c(1,0,1,0),mar=c(1,1,1,0.1))
plot.phylo(pt.subtree,tip.color = pt.colors,#align.tip.label = T,#show.node.label = TRUE,
           edge.color = clcolr,edge.width = 2,label.offset = 0.0015,font=1)
nodelabels(pt.subtree$node.label,cex=0.75,font=2,frame="none",adj=c(1,-0.2))
mtext("PopTree2",3)
t3<-plot_tree(tm.tree,"treemix/poporder",plus=0.05,scale=F,mbar=F,arrow=0.1,
              tip.order = tip.names,lwd = 2,branch.cols = branch.cols,xlab=F)
##     V1   V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 FLSG NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 2    2 <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  16   8
## 3    3 TXCC NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 4    4 FLFD NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 5   15 FLPB NOT_ROOT NOT_MIG     TIP 473  NA NA  NA  NA
## 6   16 <NA> NOT_ROOT NOT_MIG NOT_TIP   2   1  1 304   7
## 7   31 FLLG NOT_ROOT NOT_MIG     TIP 412  NA NA  NA  NA
## 8   32 <NA> NOT_ROOT NOT_MIG NOT_TIP 473  52 12 212   3
## 9   51 FLKB NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 10  52 <NA> NOT_ROOT NOT_MIG NOT_TIP  32   2  9 256   3
## 11  75 TXFW NOT_ROOT NOT_MIG     TIP 104  NA NA  NA  NA
## 12  76 <NA> NOT_ROOT NOT_MIG NOT_TIP 304 472  2 303   1
## 13 103 ALFW NOT_ROOT NOT_MIG     TIP 472  NA NA  NA  NA
## 14 104 <NA> NOT_ROOT NOT_MIG NOT_TIP 304  75  1 356   3
## 15 135 FLAB NOT_ROOT NOT_MIG     TIP 588  NA NA  NA  NA
## 16 136 <NA> NOT_ROOT     MIG NOT_TIP  32  52 NA  NA  NA
## 17 171 TXSP NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172 <NA> NOT_ROOT NOT_MIG NOT_TIP 356   3  1 171   1
## 19 211 FLHB NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212 <NA> NOT_ROOT NOT_MIG NOT_TIP  32 211  1 412   2
## 21 255 FLSI NOT_ROOT NOT_MIG     TIP 588  NA NA  NA  NA
## 22 256 <NA> NOT_ROOT NOT_MIG NOT_TIP  52   4  1 588   2
## 23 303 ALST NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 24 304 <NA> NOT_ROOT NOT_MIG NOT_TIP  16 104  4  76   3
## 25 355 TXCB NOT_ROOT NOT_MIG     TIP 356  NA NA  NA  NA
## 26 356 <NA> NOT_ROOT NOT_MIG NOT_TIP 104 172  2 355   1
## 27 411 FLCC NOT_ROOT NOT_MIG     TIP 412  NA NA  NA  NA
## 28 412 <NA> NOT_ROOT NOT_MIG NOT_TIP 212 411  1  31   1
## 29 471 LAFW NOT_ROOT NOT_MIG     TIP 472  NA NA  NA  NA
## 30 472 <NA> NOT_ROOT NOT_MIG NOT_TIP  76 471  1 103   1
## 31 473 <NA>     ROOT NOT_MIG NOT_TIP 473  15  1  32  15
## 32 490 <NA> NOT_ROOT     MIG NOT_TIP 104  75 NA  NA  NA
## 33 546 <NA> NOT_ROOT     MIG NOT_TIP 412  31 NA  NA  NA
## 34 588 <NA> NOT_ROOT NOT_MIG NOT_TIP 256 255  1 135   1
## 35 641 <NA> NOT_ROOT     MIG NOT_TIP 104  75 NA  NA  NA
##                                                                                                                                                                                                                                                                                                                                                                                                                              V11
## 1                                                                                                                                                                                                                                                                                                                                                                                                               FLSG:0.000248367
## 2                                                                                                                                                                                 (FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358
## 3                                                                                                                                                                                                                                                                                                                                                                                                               TXCC:0.000281411
## 4                                                                                                                                                                                                                                                                                                                                                                                                               FLFD:0.000926406
## 5                                                                                                                                                                                                                                                                                                                                                                                                                         FLPB:0
## 6                                                                                                                                                                                                               (FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825
## 7                                                                                                                                                                                                                                                                                                                                                                                                                 FLLG:0.0510187
## 8            (((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382,(FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155):0
## 9                                                                                                                                                                                                                                                                                                                                                                                                               FLKB:0.000474655
## 10                                                                                          ((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382
## 11                                                                                                                                                                                                                                                                                                                                                                                                                TXFW:0.0269586
## 12                                                                                                                                                                                                                                                                                                                                                                ((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786
## 13                                                                                                                                                                                                                                                                                                                                                                                                               ALFW:0.00569026
## 14                                                                                                                                                                                                                                                                                                                        (TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066
## 15                                                                                                                                                                                                                                                                                                                                                                                                                FLAB:0.0146583
## 16                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
## 17                                                                                                                                                                                                                                                                                                                                                                                                               TXSP:0.00121161
## 18                                                                                                                                                                                                                                                                                                                                                                                 (TXCC:0.000281411,TXSP:0.00121161):0.00205677
## 19                                                                                                                                                                                                                                                                                                                                                                                                              FLHB:0.000682617
## 20                                                                                                                                                                                                                                                                                                                                                    (FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155
## 21                                                                                                                                                                                                                                                                                                                                                                                                               FLSI:8.2722e-05
## 22                                                                                                                                                                                                                                                                                                                                                     (FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607
## 23                                                                                                                                                                                                                                                                                                                                                                                                                        ALST:0
## 24                                                                                                                                                                                                                                             ((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792
## 25                                                                                                                                                                                                                                                                                                                                                                                                               TXCB:0.00319305
## 26                                                                                                                                                                                                                                                                                                                                                    ((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619
## 27                                                                                                                                                                                                                                                                                                                                                                                                               FLCC:0.00110869
## 28                                                                                                                                                                                                                                                                                                                                                                                  (FLCC:0.00110869,FLLG:0.0510187):0.000504332
## 29                                                                                                                                                                                                                                                                                                                                                                                                                LAFW:0.0118029
## 30                                                                                                                                                                                                                                                                                                                                                                                    (LAFW:0.0118029,ALFW:0.00569026):0.0176978
## 31 (FLPB:0,(((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382,(FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155):0);
## 32                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
## 33                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
## 34                                                                                                                                                                                                                                                                                                                                                                                   (FLSI:8.2722e-05,FLAB:0.0146583):0.00107509
## 35                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
##              x       y   ymin   ymax
## 1  0.022525012 0.84375 0.8125 0.8750
## 2  0.021611820 0.87500 0.3750 0.9375
## 3  0.046415117 0.71875 0.6875 0.7500
## 4  0.022500716 0.34375 0.3125 0.3750
## 5  0.000000000 0.96875 0.9375 1.0000
## 6  0.022276645 0.81250 0.3750 0.8750
## 7  0.052594582 0.03125 0.0000 0.0625
## 8  0.000000000 0.18750 0.0000 0.9375
## 9  0.022086475 0.90625 0.8750 0.9375
## 10 0.020338240 0.37500 0.1875 0.9375
## 11 0.067069276 0.78125 0.7500 0.8125
## 12 0.037667946 0.43750 0.3750 0.5625
## 13 0.053147151 0.46875 0.4375 0.5000
## 14 0.040110746 0.75000 0.5625 0.8125
## 15 0.029966867 0.21875 0.1875 0.2500
## 16 0.011480800      NA 0.0000 0.1875
## 17 0.047345316 0.65625 0.6250 0.6875
## 18 0.046133706 0.68750 0.6250 0.7500
## 19 0.001754167 0.15625 0.1250 0.1875
## 20 0.001071550 0.12500 0.0000 0.1875
## 21 0.022732122 0.28125 0.2500 0.3125
## 22 0.021574310 0.31250 0.1875 0.3750
## 23 0.037667946 0.40625 0.3750 0.4375
## 24 0.032640086 0.56250 0.3750 0.8125
## 25 0.046647245 0.59375 0.5625 0.6250
## 26 0.044076936 0.62500 0.5625 0.7500
## 27 0.002684572 0.09375 0.0625 0.1250
## 28 0.001575882 0.06250 0.0000 0.1250
## 29 0.059259791 0.53125 0.5000 0.5625
## 30 0.047456891 0.50000 0.4375 0.5625
## 31 0.000000000 0.93750 0.0000 1.0000
## 32 0.064357446      NA 0.5625 0.7500
## 33 0.022581282      NA 0.0000 0.0625
## 34 0.022649400 0.25000 0.1875 0.3125
## 35 0.067069276      NA 0.5625 0.7500
## [1] "0.564492 0 0.02033824"
## [1] "0.899407 0.0401107462921379 0.0670692762921379"
## [1] "0.41172 0.001575882 0.052594582"
## [1] "0.899407 0.0401107462921379 0.0670692762921379"
##  [1] 0.022525012 0.046415117 0.022500716 0.000000000 0.052594582
##  [6] 0.022086475 0.067069276 0.053147151 0.029966867 0.047345316
## [11] 0.001754167 0.022732122 0.037667946 0.046647245 0.002684572
## [16] 0.059259791
## [1] 0.003
mtext("Treemix",3)
ybar<-0.01
mcols = rev( heat.colors(150) )
mcols = mcols[50:length(mcols)]
ymi = ybar+rect.start
yma = ybar+0.3
l = 0.2
w = l/100
xma = max(t3$d$x/20)
rect( rep(rect.start, 100), ymi+(0:99)*w, rep(rect.start+xma, 100), ymi+(1:100)*w, col = mcols, border = mcols)
text(rect.start+xma+0.001, ymi, lab = "0", adj = 0)
text(rect.start+xma+0.001, yma, lab = "0.5", adj = 0)
text(rect.start, yma+0.07, lab = "Migration", adj = 0 )
text(rect.start, yma+0.04, lab = "weight", adj = 0 )
dev.off()
## png 
##   2
Figure 4. Trees

Figure 4. Trees

Figure 5

I need to re-run populations because I don’t seem to have the correct fst files. I can use the whitelist and hopefully include bootstrapping and kernel smoothing:

populations -b 2 -W fwsw_results/subset.whitelist.txt -P fwsw_results/stacks -M fwsw_pops_map.txt --fstats --vcf_haplotypes --genomic --bootstrap -k

And then I will identify the shared outliers among them. But first, let’s define some names.

vcf<-parse.vcf("p4.upd.vcf")
vcf$SNP<-paste(vcf$`#CHROM`,vcf$POS,sep=".")
scaffs<-levels(as.factor(vcf[,1]))
scaffs[1:22]<-lgs
scaff.starts<-tapply(vcf$POS,vcf$`#CHROM`,max)
scaff.starts<-data.frame(rbind(cbind(names(scaff.starts),scaff.starts)),stringsAsFactors = F)
locus.info<-c(colnames(vcf[1:9]),"SNP")
dd.plot.name<-as.character("separate_delta-divergence.png")
dd.name<-as.character("sep.deltadivergence.txt")
sdd.name<-as.character("sep.smoothedDD.out.txt")
afs.plot.name<-as.character("p4.All_AFS.png")
stacks.sig.out<-"p4.stacks.sig.snps.txt"
annotations.name<-"p4.StacksFWSWOutliers_annotatedByGenome.csv"
bf.file<-"bayenv/p4.bf.txt"
fwsw.tx<-read.delim("stacks/p4.fst_TXCC-TXFW.txt")
fwsw.la<-read.delim("stacks/p4.fst_ALST-LAFW.txt")
fwsw.al<-read.delim("stacks/p4.fst_ALFW-ALST.txt")
fwsw.fl<-read.delim("stacks/p4.fst_FLCC-FLLG.txt")
fwsw<-read.delim("stacks/fw-sw_populations/batch_2.fst_marine-freshwater.tsv")
#Compare neighboring pops.

tx.sig<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01,"Locus.ID"]
la.sig<-fwsw.la[fwsw.la$Fisher.s.P<0.01,"Locus.ID"]
al.sig<-fwsw.al[fwsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sig<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
## [1] 962
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
## [1] 579
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
## [1] 679
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
fw.shared.chr<-fwsw.tx[fwsw.tx$Locus.ID %in% all.shared,c("Locus.ID","Chr","BP","Column","Overall.Pi")]
tapply(fw.shared.chr$Locus.ID,factor(fw.shared.chr$Chr),function(x){ length(unique(x)) })
##           LG1          LG10          LG12          LG13          LG14 
##             5             1             5             1             6 
##          LG18          LG19           LG2          LG20           LG3 
##             2             1             4             3             1 
##           LG4           LG6           LG9 scaffold_1679  scaffold_246 
##            14             3             1             1             1 
##  scaffold_409  scaffold_665  scaffold_727  scaffold_856 
##             1             1             1             1
#are they using the same SNPs or different SNPs?
stacks.sig<-data.frame(nrow=nrow(fw.shared.chr),ncol=4)
for(i in 1:nrow(fw.shared.chr)){
  tx.bp<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01 & fwsw.tx$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  la.bp<-fwsw.la[fwsw.la$Fisher.s.P<0.01 & fwsw.la$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  al.bp<-fwsw.al[fwsw.al$Fisher.s.P<0.01 & fwsw.al$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  fl.bp<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01 & fwsw.fl$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  stacks.sig[i,1]<-paste(tx.bp,sep=",",collapse = ",")
  stacks.sig[i,2]<-paste(la.bp,sep=",",collapse = ",")
  stacks.sig[i,3]<-paste(al.bp,sep=",",collapse = ",")
  stacks.sig[i,4]<-paste(fl.bp,sep=",",collapse = ",")
}
colnames(stacks.sig)<-c("TX","LA","AL","FL")
stacks.sig<-data.frame(cbind(fw.shared.chr,stacks.sig))
stacks.sig$SNP<-paste(stacks.sig$Chr,stacks.sig$BP,sep=".")
write.table(stacks.sig,stacks.sig.out,sep='\t',row.names=FALSE,col.names=TRUE)
#+ annotations, eval=FALSE
gff<-read.delim(gzfile("../../scovelli_genome/ssc_2016_12_20_chromlevel.gff.gz"),header=F)
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
genome.blast<-read.csv("../../scovelli_genome/ssc_2016_12_20_cds_nr_blast_results.csv",skip=1,header=T)#I saved it as a csv

#' extract the significant regions from the gff file
fw.sig.reg<-do.call(rbind,apply(fw.shared.chr,1,function(sig){
  this.gff<-gff[as.character(gff$seqname) %in% as.character(unlist(sig["Chr"])),]
  this.reg<-this.gff[this.gff$start <= as.numeric(sig["BP"]) & this.gff$end >= as.numeric(sig["BP"]),]
  if(nrow(this.reg) == 0){
    if(as.numeric(sig["BP"])>max(as.numeric(this.gff$end))){
      new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                      region="beyond.last.contig", description=NA,SSCID=NA)
    }else{
      new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                      region=NA,description=NA,SSCID=NA)
    }
  }else{
    if(length(grep("SSCG\\d+",this.reg$attribute))>0){
      geneID<-unique(gsub(".*(SSCG\\d+).*","\\1",this.reg$attribute[grep("SSCG\\d+",this.reg$attribute)]))
      gene<-genome.blast[genome.blast$sscv4_gene_ID %in% geneID,"blastp_hit_description"]
    }else{
      geneID<-NA
      gene<-NA
    }
    new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                    region=paste(this.reg$feature,sep=",",collapse = ","),description=gene,SSCID=geneID)
  }
  return(as.data.frame(new))
}))
## Warning in data.frame(Locus = sig["Locus.ID"], Chr = sig["Chr"], BP =
## sig["BP"], : row names were found from a short variable and have been
## discarded
write.csv(fw.sig.reg,annotations.name)
fw.sig.reg<-read.csv(annotations.name)

There are 53 shared SNPs using the Fisher’s P as a cutoff.

The bayes factors and Fsts are all in the following chunk.

fwsw<-read.delim("stacks/fw-sw_populations/batch_2.fst_marine-freshwater.tsv")
#Compare neighboring pops.

tx.sig<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01,"Locus.ID"]
la.sig<-fwsw.la[fwsw.la$Fisher.s.P<0.01,"Locus.ID"]
al.sig<-fwsw.al[fwsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sig<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
## [1] 962
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
## [1] 579
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
## [1] 679
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
bf<-read.delim(bf.file)
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,9,6)]
stacks.sig<-read.delim(stacks.sig.out)

Figure 5 includes Fst data, Bayes Factors data, colors from Structure, and reciprocal monophyly data, plus smoothed Fst values (which I think I will leave out this round).

Plot Fig. 5

Now I’m ready to plot Figure 5.

fig5.name<-"p4_stacks_fsts_fwsw_bf.png"
addLines<-FALSE
addSmooth<-TRUE
#' Set up the plotting utilities
all.chr<-data.frame(Chr=c(as.character(fwsw.tx$Chr),as.character(fwsw.la$Chr),
                          as.character(fwsw.al$Chr),as.character(fwsw.fl$Chr),
                          as.character(bf$scaffold)),
  BP=c(fwsw.tx$BP,fwsw.la$BP,fwsw.al$BP,fwsw.fl$BP,bf$BP),stringsAsFactors = F)
bounds<-tapply(as.numeric(as.character(all.chr$BP)), all.chr$Chr,max)
bounds<-data.frame(Chrom=dimnames(bounds),End=bounds)
colnames(bounds)<-c("Chrom","End")
plot.scaffs<-scaffs[scaffs %in% bounds$Chr]
plot.scaffs[1:22]<-lgs
bounds<-bounds[match(plot.scaffs,bounds$Chrom),]
#generate info

fwsw.plot<-assign.plotpos(fwsw,plot.scaffs,bounds,df.bp="BP",df.chrom = "Chr")
addLines<-TRUE
addLines<-FALSE
addSmooth<-TRUE
#' plot with the outlier regions
#' Does NOT include monophyletic neighborjoining trees.
png(fig5.name,height=6,width=8,units="in",res=300)
par(mfrow=c(5,1),mar=c(0.85,2,0,0.5),oma=c(1,1,1,0.5))
#par(fig=c(0,1,0.9-0.9/5,0.9))

fwswt.fst<-fst.plot(fwsw.tx,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols = c(grp.colors[1],grp.colors[2]),pt.cex=1,axis.size = 1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswt.fst$plot.pos,fwswt.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswt.fst$BP,fwswt.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[1])
#points(fwswt.fst$plot.pos[fwswt.fst$Locus.ID %in% all.shared],fwswt.fst$Corrected.AMOVA.Fst[fwswt.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswt.fst$plot.pos),0,1)
abline(v=fwswt.fst$plot.pos[fwswt.fst$Locus.ID %in% all.shared],col="gray47")
mtext("TXFW vs. TXCC",2,cex=0.75)#,line=-1)
labs<-tapply(fwswt.fst$plot.pos,fwswt.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)

fwswl.fst<-fst.plot(fwsw.la,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols=c(grp.colors[2],grp.colors[3]),pt.cex=1,axis.size=1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswl.fst$plot.pos,fwswl.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswl.fst$BP,fwswl.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[3])
#points(fwswl.fst$plot.pos[fwswl.fst$Locus.ID %in% all.shared],fwswl.fst$Corrected.AMOVA.Fst[fwswl.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswl.fst$plot.pos),0,1)
abline(v=fwswl.fst$plot.pos[fwswl.fst$Locus.ID %in% all.shared],col="gray47")
mtext("LAFW vs. ALST",2,cex=0.75)#,line=-1)
labs<-tapply(fwswl.fst$plot.pos,fwswl.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)

fwswa.fst<-fst.plot(fwsw.al,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols=c(grp.colors[3],grp.colors[2]),pt.cex=1,axis.size = 1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswa.fst$plot.pos,fwswa.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswa.fst$BP,fwswa.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[4])
#points(fwswa.fst$plot.pos[fwswa.fst$Locus.ID %in% all.shared],fwswa.fst$Corrected.AMOVA.Fst[fwswa.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswa.fst$plot.pos),0,1)
abline(v=fwswa.fst$plot.pos[fwswa.fst$Locus.ID %in% all.shared],col="gray47")
mtext("ALFW vs. ALST",2,cex=0.75)#,line=-1)
labs<-tapply(fwswa.fst$plot.pos,fwswa.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)

fwswf.fst<-fst.plot(fwsw.fl,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols=c(grp.colors[6],grp.colors[5]),pt.cex=1,axis.size=1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswf.fst$plot.pos,fwswf.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswf.fst$BP,fwswf.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[6])
#points(fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],fwswf.fst$Corrected.AMOVA.Fst[fwswf.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswf.fst$plot.pos),0,1)
abline(v=fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],col="gray47")
mtext("FLFW vs. FLCC",2,cex=0.75)#,line=-1)
mtext(expression(bold(italic(F)[ST])),2,outer=T,line=-1,cex=0.75)
labs<-tapply(fwswf.fst$plot.pos,fwswf.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)
#BF
bs.sal<-fst.plot(bf,fst.name="logSal",chrom.name="scaffold",bp.name = "BP",scaffold.widths=bounds,
                 scaffs.to.plot = plot.scaffs,pch=19,axis.size = 1,pt.cex = 1)
points(bs.sal[bs.sal$SNP %in% sal.bf.sig$SNP,"plot.pos"],
       bs.sal[bs.sal$SNP %in% sal.bf.sig$SNP,"logSal"],
       col="cornflowerblue",pch=19)#give the outliers a color
clip(0,max(bs.sal$plot.pos),-3,241)
abline(v=fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],col="gray47")
mtext("log(Salinity BF)",2,cex=0.75,line=2.1)
#points(bs.sal[bs.sal$SNP %in% stacks.sig$SNP,"plot.pos"],
#       bs.sal[bs.sal$SNP %in% stacks.sig$SNP,"logSal"],
#       col="cornflowerblue",cex=1.3)
#clip(min(bs.sal$plot.pos),max(bs.sal$plot.pos),
#     min(bs.sal$logSal),max(bs.sal$logSal))
#abline(h=log(bf.co["Salinity_BF"]),col="cornflowerblue",lwd=2)

#add chromosome labels
labs<-tapply(bs.sal$plot.pos,bs.sal$scaffold,median)
text(x=labs[lgs],y=-25,labels=lgn,xpd=TRUE)
dev.off()
## png 
##   2
Figure 5. Fsts

Figure 5. Fsts

There seems to be an overabundance of shared outliers on LG4. I can test this using the multinomial distribution.

if(!("stacks.sig" %in% ls())){
  stacks.sig<-read.delim("p4.stacks.sig.snps.txt")
} 
if(is.null(stacks.sig$SNP)){ #make sure the SNP column is there.
  stacks.sig$SNP<-paste(stacks.sig$Chr,as.numeric(stacks.sig$BP)+1,sep=".")
}
sig.chrom<-stacks.sig[stacks.sig$Chr %in% lgs,]
sig.chrom$Chr<-factor(sig.chrom$Chr)
nloci<-nrow(sig.chrom)
nchrom<-length(lgs)
exp.perchrom<-rep(nloci/length(lgs),length(lgs))
names(exp.perchrom)<-lgs
exp.perchrom
##      LG1      LG2      LG3      LG4      LG5      LG6      LG7      LG8 
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 
##      LG9     LG10     LG11     LG12     LG13     LG14     LG15     LG16 
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 
##     LG17     LG18     LG19     LG20     LG21     LG22 
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364
obs.perchrom<-tapply(sig.chrom$Locus.ID,sig.chrom$Chr,length)[lgs]
names(obs.perchrom)<-lgs
obs.perchrom[is.na(obs.perchrom)]<-0
obs.perchrom
##  LG1  LG2  LG3  LG4  LG5  LG6  LG7  LG8  LG9 LG10 LG11 LG12 LG13 LG14 LG15 
##    5    4    1   14    0    3    0    0    1    1    0    5    1    6    0 
## LG16 LG17 LG18 LG19 LG20 LG21 LG22 
##    0    0    2    1    3    0    0
dmultinom(x = obs.perchrom,prob = exp.perchrom)
## [1] 1.333996e-25

I can also look for overlapping loci in pairwise saltwater-saltwater comparisons.

swsw.name<-"p4_stacks_swsw.png"
swsw.tx<-read.delim("stacks/p4.fst_TXCB-TXCC.txt")
swsw.al<-read.delim("stacks/p4.fst_ALST-FLSG.txt")
swsw.fl<-read.delim("stacks/p4.fst_FLCC-FLHB.txt")
###### SW-SW neighbors ######

tx.sw.sig<-swsw.tx[swsw.tx$Fisher.s.P<0.01,"Locus.ID"]
al.sw.sig<-swsw.al[swsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sw.sig<-swsw.fl[swsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sw.sig[(tx.sw.sig %in% c(al.sw.sig,fl.sw.sig))])
## [1] 106
length(al.sw.sig[(al.sw.sig %in% c(tx.sw.sig,fl.sw.sig))])
## [1] 135
length(fl.sw.sig[(fl.sw.sig %in% c(tx.sw.sig,al.sw.sig))])
## [1] 35
sw.shared<-fl.sw.sig[fl.sw.sig %in% al.sw.sig & fl.sw.sig %in% tx.sw.sig]

png(swsw.name,height=6,width=7.5,units="in",res=300)
par(mfrow=c(3,1),mar=c(0.85,2,0,0.5),oma=c(1,1,1,0.5))
swswt.fst<-fst.plot(swsw.tx,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,axis.size = 1,
                    pt.cols=c(grp.colors[1],grp.colors[2]),pt.cex = 1,pch=19)
mtext("TXCC vs. TXCB",3,line=-1,cex=0.75)
swswa.fst<-fst.plot(swsw.al,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", y.lim=c(0,1),
                    scaffs.to.plot=plot.scaffs, scaffold.widths = bounds,axis.size = 1,
                    pt.cols=c(grp.colors[2],grp.colors[3]),pt.cex = 1,pch=19)
#points(swswa.fst$plot.pos,swswa.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[3])
mtext("ALST vs. FLSG",3,line=-1,cex=0.75)
swswf.fst<-fst.plot(swsw.fl,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,axis.size = 1,
                    pt.cols=c(grp.colors[6],grp.colors[5]),pt.cex = 1,pch=19)
#points(swswf.fst$plot.pos,swswf.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[6])
mtext("FLCC vs. FLHB",3,line=-1,cex=0.75)
mtext(expression(italic(F)[ST]),2,outer=T,line=-0.75,cex=0.75)
last<-0
for(i in 1:length(lgs)){
  text(x=mean(swswf.fst[swswf.fst$Chr ==lgs[i],"plot.pos"]),y=-0.07,
       labels=lgn[i], adj=1, xpd=TRUE)
  last<-max(swswf.fst[swswf.fst$Chr ==lgs[i],"plot.pos"])
}
dev.off()
## png 
##   2
SWSW Fsts

SWSW Fsts

Diversity statistics

In Figure 6, we show different measures of diversity: pi (nucleotide diversity), heterozygosity, Jost’s D, and delta-divergence. Additionally, we include the shared Fst outliers and putative loci, and highlight regions with high pi and (H).

Allele Frequency Spectrum

#' Calculate AFS from vcf
fw.afs<-lapply(fw.list,function(pop){
  this.vcf<-cbind(vcf[,locus.info],vcf[,grep(pop,colnames(vcf))])
  this.afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
})
names(fw.afs)<-c("TXFW","LAFW","ALFW","FLFW")
sw.afs<-lapply(sw.list,function(pop){
  this.vcf<-cbind(vcf[,locus.info],vcf[,grep(pop,colnames(vcf))])
  this.afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
})
names(sw.afs)<-sw.list
all.afs<-c(fw.afs,sw.afs)
Figure S1. Fsts

Figure S1. Fsts

Pi

pi.file.name<-"p4.pi.txt"
avgpi.file.name<-"p4.avgpi.txt"
all.pi<-data.frame(Chrom=vcf$`#CHROM`,Pos=vcf$POS,Pi=unlist(apply(vcf,1,calc.pi)))
all.pi$SNP<-paste(all.pi$Chrom,as.numeric(as.character(all.pi$Pos)),sep=".")
write.table(all.pi,pi.file.name,col.names = TRUE,row.names=FALSE, quote=FALSE,sep='\t')
all.pi<-read.table(pi.file.name,header=T)
avg.pi.adj<-read.table(avgpi.file.name,header=T)

Heterozygosity

all.het.name<-"p4.avg.het.txt"
avg.het.adj.name<-"p4.avg.het.adj.txt"
#het
avg.het<-do.call("rbind",sliding.window(vcf,lgs,stat="het",width=10))
avg.het.adj<-fst.plot(avg.het,scaffold.widths=scaff.starts,pch=19,
                     fst.name = "Avg.Stat",chrom.name = "Chr",bp.name = "Avg.Pos")
all.het<-data.frame(Chrom=vcf$`#CHROM`,Pos=vcf$POS,Het=unlist(apply(vcf,1,calc.het)))
all.het$SNP<-paste(all.het$Chrom,as.numeric(as.character(all.het$Pos)),sep=".")
write.table(avg.het.adj,avg.het.adj.name,sep='\t',col.names=TRUE)
write.table(all.het,all.het.name,sep='\t',col.names=TRUE)
avg.het.adj<-read.delim(avg.het.adj.name,header=TRUE)
all.het<-read.delim(all.het.name,header=TRUE)

delta-divergence

swfw.mu<-calc.mean.fst(vcf = vcf,pop.list1 = sw.list,pop.list2 = fw.list,maf.cutoff=0.01)
fwfw.mu<-calc.mean.fst(vcf = vcf,pop.list1 = fw.list,pop.list2 = fw.list, maf.cutoff=0.01)
deltad<-merge(swfw.mu,fwfw.mu,by="SNP")
 deltad<-deltad[,c("SNP","Chrom.x","Pos.x","Mean.Fst.x","Mean.Fst.y")]
colnames(deltad)<-c("SNP","Chrom","Pos","MeanSWFW.Fst","MeanFWFW.Fst")
deltad$deltad<-deltad$MeanSWFW.Fst - deltad$MeanFWFW.Fst
deltad<-deltad[!is.na(deltad$deltad),]#remove NAs
write.table(deltad, dd.name,sep='\t',col.names = TRUE,row.names = FALSE)
#' ```

This plotting function also generates the smoothed \(\delta\)-divergence values (which are not saved) and identifies the outliers (in 99% and 1% quantiles, and which are saved in sdd.out).

Jost’s D

First, I must calculate Jost’s D on each locus in the p4 dataset

sub.genind<-read.structure("stacks/subset.structure.stru",n.ind=698,
                           n.loc=9638,col.lab=1,col.pop=2,sep='\t',
                           row.marknames = 2,onerowperind=FALSE,ask=FALSE)
sub.genind@pop<-factor(gsub("sample_(\\w{4}).*","\\1",rownames(sub.genind@tab)))
jostd<-D_Jost(sub.genind) 
write.table(jostd$per.locus,"p4.jostd.perlocus.txt",sep='\t',col.names=FALSE,row.names = TRUE,quote=F)

Now this is done, so I don’t need to evaluate it again – I’ll just need to read it in from a file when I want to use it.

jostd<-read.delim(jostd.name,header=F)
colnames(jostd)<-c("locid","D")
jostd$SNP<-vcf$SNP
jostd$POS<-vcf$POS
jostd$Chr<-vcf$`#CHROM`
jostd$ID<-vcf$ID

Smooth the statistics

smoothed.name<-"p4_deltad_pi_het.png"
#assign the plotting positions relative to all loci
stacks.sig<-assign.plotpos(stacks.sig,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chr",df.bp="BP")
dd<-assign.plotpos(deltad,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos") 
pi<-assign.plotpos(all.pi,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos") 
ht<-assign.plotpos(all.het,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos") 
jd<-assign.plotpos(jostd,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chr",df.bp="POS") 
colnames(jd)<-c("locid","D","SNP","Pos","Chrom","ID","plot.pos") #standardize the names

smooth.per.chrom<-function(df,lgs,df.chrom,df.bp,df.stat,out.name,...){#span=0.1,degree=2
  smooth.all<-data.frame(stringsAsFactors = FALSE) #all smoothed values
  smooth.low<-data.frame(stringsAsFactors = FALSE) #lower outliers
  smooth.upp<-data.frame(stringsAsFactors = FALSE) #upper outliers
  for(i in 1:length(lgs)){
    this.chrom<-df[df[,df.chrom] %in% lgs[i],]
    this.smooth<-loess.smooth(this.chrom[,df.bp],this.chrom[,df.stat],...) 
    this.smooth<-data.frame(chr=cbind(rep(as.character(lgs[i]),length(this.smooth$x)),
                                  pos=as.numeric(this.smooth$x),
                                  smoothed.stats=as.numeric(this.smooth$y)),stringsAsFactors = FALSE)
    this.upp<-this.chrom[this.chrom[df.stat]>=quantile(this.chrom[,df.stat],0.99),]#choosing the upper outliers
    this.low<-this.chrom[this.chrom[,df.stat]<=quantile(this.chrom[,df.stat],0.01),]#choosing the lower outliers
                   
    # save the data
    smooth.all<-data.frame(rbind(smooth.all,this.smooth))
    smooth.low<-rbind(smooth.low,this.low)
    smooth.upp<-rbind(smooth.upp,this.upp)
  }
   
  smooth.upp$direction<-"upper" 
  smooth.low$direction<-"lower"
  smooth.out<-rbind(smooth.low,smooth.upp) # put the outliers in one df
  colnames(smooth.all)<-c("chr","pos","smoothed.stats")
  smooth.all$pos<-as.numeric(as.character(smooth.all$pos))
  smooth.all$smoothed.stats<-as.numeric(as.character(smooth.all$smoothed.stats))
  # save to files
  write.table(smooth.out,paste(out.name,"outliers.txt",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
  write.table(smooth.all,paste(out.name,"smoothed.txt",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
  smoothed<-list(smooth.out,smooth.all)
}

#smooth the statistics
dd.bp.smooth<-smooth.per.chrom(dd,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="deltad",out.name="dd.bp",span=.1,degree=2) 
pi.bp.smooth<-smooth.per.chrom(pi,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="Pi",out.name="pi.bp",span=0.1,degree=2) 
ht.bp.smooth<-smooth.per.chrom(ht,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="Het",out.name="ht.bp",span=0.1,degree=2)
jd.bp.smooth<-smooth.per.chrom(jd,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="D",out.name="jd.bp",span=0.1,degree=2)
#smooth the statistics
dd.pp.smooth<-smooth.per.chrom(dd,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="deltad",out.name="dd",span=0.1,degree=2)
pi.pp.smooth<-smooth.per.chrom(pi,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="Pi",out.name="pi",span=0.1,degree=2) 
ht.pp.smooth<-smooth.per.chrom(ht,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="Het",out.name="ht",span=0.1,degree=2) 
jd.pp.smooth<-smooth.per.chrom(jd,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="D",out.name="jd",span=0.1,degree=2) 
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
plot.smooth.stats<-function(df,smooth,stat.name,color,ylabel,...){
  dd<-fst.plot(fst.dat = df,...)
  mtext(ylabel,2,line=1.5,cex=0.75)
  points(x=as.numeric(as.character(smooth[[2]]$pos)),
         y=as.numeric(as.character(smooth[[2]]$smoothed.stats)),col=color,type="l",lwd=2)
  points(x=as.numeric(smooth[[2]]$pos),y=as.numeric(smooth[[2]]$smoothed.stats),col=color,type="l",lwd=2)
  points(smooth[[1]]$plot.pos[smooth[[1]]$direction=="upper"],smooth[[1]][smooth[[1]]$direction=="upper",stat.name],
         pch=24,bg=color,col=color,cex=0.5) #add  outliers
  points(smooth[[1]]$plot.pos[smooth[[1]]$direction=="lower"],smooth[[1]][smooth[[1]]$direction=="lower",stat.name],
         pch=25,bg=color,col=color,cex=0.5)
}
xlab.lgs<-function(df,lgs,lgn,chr,bp,...){
  last<-0
  for(i in 1:length(lgs)){
    text(x=median(df[df[,chr] ==lgs[i],bp]),labels=lgn[i],...)
    last<-max(df[df[,chr]  ==lgs[i],bp])
  }
}
png(smoothed.name,height=7,width=5,units="in",res=300)
par(mfrow=c(4,1),mar=c(1,1,1,1),oma=c(1,2,1,1),cex=0.75)
t<-plot.smooth.stats(df=dd,smooth=dd.pp.smooth,stat.name="deltad",
                     color=comp.col["deltad"],fst.name="deltad",bp.name="Pos",
                  ylabel=expression(paste(delta,"-divergence")),y.lim=c(-0.5,1),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(dd,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.5,cex=0.75)
#plot Jost's D
d<-plot.smooth.stats(df=jd,smooth=jd.pp.smooth,stat.name="D",
                     color=comp.col["D"],fst.name="D",chrom.name = "Chrom",bp.name="Pos",
                  ylabel=expression("Jost's"~italic(D)),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(jd,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.075,cex=0.75)
#plot pi
p<-plot.smooth.stats(df=pi,smooth=pi.pp.smooth,stat.name="Pi",
                     color=comp.col["pi"],fst.name="Pi",chrom.name = "Chrom",bp.name="Pos",
                  ylabel=expression(pi),y.lim=c(0,0.5),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(pi,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.05,cex=0.75)
#abline(v=stacks.sig$plot.pos,col="grey10") #stacks
#plot heterozygosity
h<-plot.smooth.stats(df=ht,smooth=ht.pp.smooth,stat.name="Het",
                     color=comp.col["Het"],fst.name="Het",chrom.name = "Chrom",bp.name="Pos",
                  ylabel=expression(italic(H)),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(ht,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.05,cex=0.75)



dev.off()
## png 
##   2
Figure x. Smoothed variables

Figure x. Smoothed variables

bf$SNP<-paste(bf$scaffold,as.numeric(as.character(bf$BP))+1,sep=".")
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,6,9)]
all.out<-data.frame(rbind(cbind(paste(fw.sig.reg$Chr,fw.sig.reg$BP,sep="."),"fst"),
                          cbind(dd.bp.smooth[[1]]$SNP,"deltad"),
                          cbind(ht.bp.smooth[[1]]$SNP,"het"),
                          cbind(jd.bp.smooth[[1]]$SNP,"jostd"),
                          cbind(pi.bp.smooth[[1]]$SNP,"pi"),
                          cbind(sal.bf.sig$SNP,"bayenv_salinity")),stringsAsFactors = FALSE)
colnames(all.out)<-c("SNP","test")
write.table(all.out,"all_outliers.txt",sep='\t',quote=FALSE,col.names=TRUE,row.names=FALSE)

Use putative genes

Here, I compare the Fst outliers and \(\delta\)-divergence outliers to putative genes identified from other studies of teleosts adapting to freshwater environments.

#putative genes
put.genes<-read.delim("putative_genes.txt",header=TRUE,sep='\t')
#genome annotations
gff.name<-"ssc_2016_12_20_chromlevel.gff.gz"
if(length(grep("gz",gff.name))>0){
  gff<-read.delim(gzfile(paste("../../scovelli_genome/",gff.name,sep="")),header=F)
} else{
  gff<-read.delim(paste("../../scovelli_genome/",gff.name,sep=""),header=F)
}
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
#ld info
ld<-read.delim("p4.ld_info_fwsw.txt")

Define some functions

These are updated from fwsw_analysis.R and are specific to these analyses, and are not generalizable!

ssc.fwsw.fstsig<-function(tx, la, al, fl, cutoff)
{
  tx.sig<-tx[tx$Fisher.s.P<cutoff,"Locus.ID"]
  la.sig<-la[la$Fisher.s.P<cutoff,"Locus.ID"]
  al.sig<-al[al$Fisher.s.P<cutoff,"Locus.ID"]
  fl.sig<-fl[fl$Fisher.s.P<cutoff,"Locus.ID"]
  length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
  length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
  length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
  all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
  fw.shared.chr<-fwsw.tx[fwsw.tx$Locus.ID %in% all.shared,c("Locus.ID","Chr","BP","Column","Overall.Pi")]
  tapply(fw.shared.chr$Locus.ID,factor(fw.shared.chr$Chr),function(x){ length(unique(x)) })
  #are they using the same SNPs or different SNPs?
  stacks.sig<-data.frame(nrow=nrow(fw.shared.chr),ncol=4)
  for(i in 1:nrow(fw.shared.chr)){
    tx.bp<-fwsw.tx[tx$Fisher.s.P<cutoff & tx$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    la.bp<-fwsw.la[la$Fisher.s.P<cutoff & la$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    al.bp<-fwsw.al[al$Fisher.s.P<cutoff & al$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    fl.bp<-fwsw.fl[fl$Fisher.s.P<cutoff & fl$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    stacks.sig[i,1]<-paste(tx.bp,sep=",",collapse = ",")
    stacks.sig[i,2]<-paste(la.bp,sep=",",collapse = ",")
    stacks.sig[i,3]<-paste(al.bp,sep=",",collapse = ",")
    stacks.sig[i,4]<-paste(fl.bp,sep=",",collapse = ",")
  }
  colnames(stacks.sig)<-c("TX","LA","AL","FL")
  stacks.sig<-data.frame(cbind(fw.shared.chr,stacks.sig))
  stacks.sig$SNP<-paste(stacks.sig$Chr,stacks.sig$BP,sep=".")
  return(stacks.sig)
}

#' extract the significant regions from the gff file
sig.region.ann<-function(fw.shared.chr,gff)
{
  fw.sig.reg<-do.call(rbind,apply(fw.shared.chr,1,function(sig){
    this.gff<-gff[as.character(gff$seqname) %in% as.character(unlist(sig["Chr"])),]
    this.reg<-this.gff[this.gff$start <= as.numeric(sig["BP"]) & this.gff$end >= as.numeric(sig["BP"]),]
    if(nrow(this.reg) == 0){
      if(as.numeric(sig["BP"])>max(as.numeric(this.gff$end))){
        new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                        region="beyond.last.contig", description=NA,SSCID=NA)
      }else{
        new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                        region=NA,description=NA,SSCID=NA)
      }
    }else{
      if(length(grep("SSCG\\d+",this.reg$attribute))>0){
        geneID<-unique(gsub(".*(SSCG\\d+).*","\\1",this.reg$attribute[grep("SSCG\\d+",this.reg$attribute)]))
        gene<-genome.blast[genome.blast$sscv4_gene_ID %in% geneID,"blastp_hit_description"]
      }else{
        geneID<-NA
        gene<-NA
      }
      new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                      region=paste(this.reg$feature,sep=",",collapse = ","),description=gene,SSCID=geneID)
    }
    return(as.data.frame(new))
  }))
}

Combine putative genes and genomic info

#' extract the significant regions from the gff file
put.reg<-do.call(rbind,apply(put.genes,1,function(gene){
  ssc.gene<-as.character(gene[[6]])
  #there might be multiple matches
  ssc.genes<-unlist(strsplit(ssc.gene,","))
  #for each gene, match to gff
  gene.info<-do.call(rbind,lapply(ssc.genes,function(genename){
    this.gff<-gff[grep(genename,gff$attribute),]
    chrom<-unique(as.character(this.gff$seqname))
    start<-min(this.gff$start)
    stop<-max(this.gff$end)
    return(c(chrom,start,stop))
  }))
  if(as.character(gene[[3]]) == "") { gene[[3]]<-NA }
  new<-data.frame(Gene=rep(gene[[1]],length(gene.info[,1])),
                  Function=rep(gene[[2]],length(gene.info[,1])),
                  Chromsome=rep(gene[[3]],length(gene.info[,1])),
                  Species=rep(gene[[4]],length(gene.info[,1])),
                  Citation=rep(gene[[5]],length(gene.info[,1])),
                  Scovelli_geneID=rep(gene[[6]],length(gene.info[,1])),
                  Notes=rep(gene[[7]],length(gene.info[,1])),
                  Chrom=gene.info[,1],StartBP=gene.info[,2],StopBP=gene.info[,3],
                  stringsAsFactors = FALSE)
  return(as.data.frame(new))
}))
write.table(put.reg,"putative.gene.regions.tsv",col.names=T,sep='\t')

Note that some genes will have multiple rows as they match multiple locations in the genome.

Do the putative genes contain RAD loci?

put.reg$rad.loci<-apply(put.reg,1,function(gene){
  rad<-vcf[vcf$`#CHROM` %in% gene[["Chrom"]] & 
           vcf$POS >= as.numeric(as.character(gene[["StartBP"]])) & 
           vcf$POS <= as.numeric(as.character(gene[["StopBP"]])),]  
  if(nrow(rad)>0){ rad.snps<-paste(rad$SNP,sep=",",collapse=",") }
  else { rad.snps<-NA }
  return(rad.snps)
})

About 48.1818182% of the putative genes have RAD loci within them - but only about 22.037037% of all of the annotations have RAD loci.

Compare the Stacks outliers with putative genes

What is the maximum distance of loci in LD?

\(D'\) was calculated using a custom C++ program (found at https://github.com/spflanagan/SCA/tree/master/programs/pairwise_ld_vcf) using the formula:

\[D' = \sum_{i=1}^m\sum_{j=1}^n \frac{p_iq_j|D_{ij}|}{D_{max}},\] where \(m\) is the number of alleles at locus and \(n\) is the number of alleles at locus B. \(D_{ij}\) was calculated as \(f_{ij}-p_iq_j\), where \(f_{ij}\) is the frequency of the \(A_iB_j\) haplotype, \(p_i\) is the frequency of allele \(i\), and \(q_j\) is the frequency of allele \(j\). \(D_{max}\) is the lesser of \(p_iq_j\) or \((1-p_i)(1-q_j)\) when \(D_{ij} \lt 0\) and is the minimum of \((1-p_i)q_i\) or \(p_i(1-q_i)\) when \(D_{ij} \gt 0\).

In the data.frame ld, the locus IDs (LocIDA and LocIDB) are the positions on the chromosome.

ld$pos.diff<-abs(ld$LocIDA - ld$LocIDB)
chrom.ld<-tapply(ld[ld$D.>0.5,"pos.diff"],ld[ld$D. >0.5,"Chrom"],mean)

Functions for comparison

#' Find outliers in a putative gene region
outlier.in.region<-function(outlier.df, region.df, oChrom="Chrom", oBP="BP",
                            rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
  out.in<-apply(region.df,1,function(put.gene){
    oin<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]] 
      & outlier.df[[oBP]] >= as.numeric(as.character(put.gene[[rBPStart]]))
      & outlier.df[[oBP]] <= as.numeric(as.character(put.gene[[rBPStop]])),"SNP"]
    if(length(oin)==0){ oin<-NA }
    return(paste(oin,sep=",",collapse=","))
  })
  return(out.in)
}
outlier.nearby<-function(outlier.df,region.df,ld.df,
                         oChrom="Chrom", oBP="BP",
                         rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
  out.nearby<-apply(region.df,1,function(put.gene){
    nearby<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]] 
      & outlier.df[[oBP]] >= (as.numeric(as.character(put.gene[[rBPStart]])) - (ld.df[put.gene[[rChrom]] ]))
      & outlier.df[[oBP]] <= (as.numeric(as.character(put.gene[[rBPStop]])) + (ld.df[put.gene[[rChrom]] ])),"SNP"]
    if(length(nearby)==0){ nearby<-NA }
    return(paste(nearby,sep=",",collapse=","))
  })
}

Prep regions for plotting

#' Get the plotting positions for the putative genes
all.chr<-data.frame(Chr=vcf$`#CHROM`,BP=vcf$POS,stringsAsFactors = F)
bounds<-data.frame(levels(as.factor(all.chr$Chr)),tapply(as.numeric(as.character(all.chr$BP)),all.chr$Chr,max))
colnames(bounds)<-c("Chrom","End")
plot.scaffs<-scaffs[scaffs %in% bounds$Chrom]

#convert put.reg to the plotting BP
new.dat<-data.frame(stringsAsFactors = F)
last.max<-0
for(i in 1:length(plot.scaffs)){
  #pull out the data for this scaffold
  if(nrow(bounds[bounds$Chrom %in% plot.scaffs[i],])>0){ #sanity check
    chrom.dat<-put.reg[put.reg$Chrom %in% plot.scaffs[i],]
    if(nrow(chrom.dat)>0){
      chrom.dat$plot.min<-as.numeric(as.character(chrom.dat$StartBP))+last.max
      chrom.dat$plot.max<-as.numeric(as.character(chrom.dat$StopBP))+last.max
      new.dat<-rbind(new.dat,chrom.dat)
      #last.max<-max(chrom.dat$plot.pos)+
      #               as.numeric(scaffold.widths[scaffold.widths[,1] %in% scaffs.to.plot[i],2])
    }
    last.max<-last.max+
      as.numeric(bounds[bounds$Chrom %in% plot.scaffs[i],2])
  }else{
    print(paste(plot.scaffs[i], "has no designated width."))
  }
}
#make sure everything is the correct class
new.dat$plot.min<-as.numeric(as.character(new.dat$plot.min))
new.dat$plot.max<-as.numeric(as.character(new.dat$plot.max))

Plot the Fsts

Are outliers in putative gene regions?

fw.sig.reg<-read.csv("p4.StacksFWSWOutliers_annotatedByGenome.csv")
all.put.ssc<-as.character(put.reg$Scovelli_geneID)
all.put.ssc<-unlist(lapply(all.put.ssc,strsplit,","))
sig.put.match<-all.put.ssc[all.put.ssc %in% fw.sig.reg$SSCID]
print(dim(sig.put.match))
## NULL
#or use the functions
fw.sig.reg$SNP<-paste(fw.sig.reg$Chr,as.character(fw.sig.reg$BP),sep=".")
stacks.in<-outlier.in.region(fw.sig.reg,put.reg,oChrom="Chr")

Of the 54 shared outlier SNPs, 27 are in coding regions of the genome. However, 2 shared outliers are in putative gene regions.

No, they don’t overlap. Maybe they’re in the same LD region?

put.nearby.rad<-outlier.nearby(fw.sig.reg,put.reg,chrom.ld,oChrom = "Chr")

#add the nearby significant rad loci to the put.reg data.frame
put.reg$nearby.rad<-put.nearby.rad
head(put.reg[put.reg$nearby.rad!="NA",])
##     Gene
## 1  ABCB7
## 2 ABLIM3
## 3 ADAM19
## 4 ADAM19
## 6  ADRB2
## 7  ADRB2
##                                                                         Function
## 1                                                                iron metabolism
## 2                                                                  actin-binding
## 3                                                     osteoblast differentiation
## 4                                                     osteoblast differentiation
## 6                                                bone density and mineralization
## 7 bone density and mineralization, tooth organogenesis, craniofacial development
##   Chromsome                Species             Citation
## 1      <NA> Gasterosteus aculeatus    Jones et al 2012b
## 2      <NA> Gasterosteus aculeatus Hohenlohe et al 2010
## 3        IV Gasterosteus aculeatus Hohenlohe et al 2010
## 4        IV Gasterosteus aculeatus Hohenlohe et al 2010
## 6       VII Gasterosteus aculeatus Hohenlohe et al 2010
## 7        IV Gasterosteus aculeatus Hohenlohe et al 2010
##                   Scovelli_geneID Notes Chrom  StartBP   StopBP rad.loci
## 1                 SSCG00000000949  <NA>  LG14  5689975  5711169     <NA>
## 2                 SSCG00000004181  <NA>  LG14 12122996 12144915     <NA>
## 3 SSCG00000004162,SSCG00000007962  <NA>  LG14 11860301 11877333     <NA>
## 4 SSCG00000004162,SSCG00000007962  <NA>  LG14 22306738 22325075     <NA>
## 6                 SSCG00000004169  <NA>  LG14 12061583 12065881     <NA>
## 7                 SSCG00000004169  <NA>  LG14 12061583 12065881     <NA>
##                                            nearby.rad
## 1 LG14.3289190,LG14.3352883,LG14.3960463,LG14.5029044
## 2            LG14.5029044,LG14.13689654,LG14.18295431
## 3            LG14.5029044,LG14.13689654,LG14.18295431
## 4                                       LG14.18295431
## 6            LG14.5029044,LG14.13689654,LG14.18295431
## 7            LG14.5029044,LG14.13689654,LG14.18295431

So of the 540 gene annotations 229 are within the mean LD range of an outlier, which represent 67 putative freshwater genes. Those genes are: ABCB7, ABLIM3, ADAM19, ADRB2, AFAP1L1, angiotensin II receptor, ANXA6, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, AVPR2, BGN, CA, CAII, CAMKK1, CD63, CEBPD, CFTR, cortisol, ECaC, EDA, EFNB1, FBLN1, FERH1, FZD2, HRH2, IGFBP5, IGFBP6, IGF-I, IL12B, KCNH4, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NBC1, NCX, NHE, NKCC, NR4A1, PDLIM7, PMCA, PODXL, PRKCD, PRL, RDH10, RHOA, RHOGTP8, rtCR2, SCUBE1, SLC26A3, SLC2A13, SPG1, SYNPO, TBX2, TRIM14, UT, VATPase, WNT5A, WNT7B, ZEB1

Compare \(\delta\) -divergence to putative regions

Now for \(\delta\) -divergence

#in the gene
sdd.in<-outlier.in.region(dd.bp.smooth[[1]],put.reg,oBP = "Pos")
put.reg$sdd.in<-sdd.in

The genes ARHGEF3, OSBPL8, RHOGTP8 contain \(\delta\)-divergence outlier regions.

#in the gene
sdd.nb<-outlier.nearby(dd.bp.smooth[[1]],put.reg,chrom.ld,oBP = "Pos")
put.reg$sdd.nb<-sdd.nb

And of the 154 \(\delta\)-divergence outliers, 91.4814815% are in the LD region around putative freshwater genes.

Compare Bayenv outliers to putative regions

#taken directly from fwsw_analysis.R
bf<-read.delim("bayenv/p4.bf.txt",header=T)
bf$SNP<-paste(bf$scaffold,as.numeric(as.character(bf$BP))+1,sep=".")
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
temp.bf.sig<-bf[bf$Temp_BF>bf.co["Temp_BF"],c(1,2,4,8,5,9)]
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,6,9)]
grass.bf.sig<-bf[bf$seagrass_BF>bf.co["seagrass_BF"],c(1,2,4,8,7,9)]
#get the log transformed Bayes Factors
bf$logSal<-log(bf$Salinity_BF)
bf$logTemp<-log(bf$Temp_BF)
bf$logSeagrass<-log(bf$seagrass_BF)

There are 0 overlapping outliers between temperature-, salinity-, and seagrass-associated loci.

But if we only care about salinity ones, there are 91 outliers.

Are any of the Bayenv salinity outliers near the putative freshwater genes?

bfs.in<-outlier.in.region(sal.bf.sig,put.reg,"scaffold")
bfs.nb<-outlier.nearby(sal.bf.sig,put.reg,chrom.ld,"scaffold")

Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.3703704% are in putative freshwater genes and 75.5555556% are within the LD neighborhood.

Are any of the Bayenv temperature outliers near putative freshwater genes?

bft.in<-outlier.in.region(temp.bf.sig,put.reg,"scaffold")
bft.nb<-outlier.nearby(temp.bf.sig,put.reg,chrom.ld,"scaffold")

Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.5555556% are in putative freshwater genes and 80.1851852% are within the LD neighborhood.

Are any of the loci associated with seagrass density in or near putative freshwater genes?

bfg.in<-outlier.in.region(grass.bf.sig,put.reg,"scaffold")
bfg.nb<-outlier.nearby(grass.bf.sig,put.reg,chrom.ld,"scaffold")

Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.1851852% are in putative freshwater genes and 77.962963% are within the LD neighborhood.

Which putative genes contain each of those?

put.reg$bfs.in<-bfs.in
put.reg$bft.in<-bft.in
put.reg$bfg.in<-bfg.in

unique(put.reg[put.reg$bfs.in !="NA","Gene"])
## [1] ARHGEF3 NHE    
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bft.in !="NA","Gene"])
## [1] APOL   NHE    TRIM14
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bfg.in !="NA","Gene"])
## [1] ARHGEF3
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1

All three BayEnv tests identified outliers in the genes . The temperature and grass also share NA, and temperature and salinity share NHE.

Jost’s D

Jost’s D measures the fraction of allelic differentiation between populations. According to Molecular Ecologist, “if allelic differentiation at a particular locus is the value of interest, it appears that D is the best measure”, so it might be useful to look at Jost’s D in the putatively freshwater genes.

jost.in<-outlier.in.region(jd.bp.smooth[[1]],put.reg,oChrom="Chrom",oBP="Pos")
jost.nb<-outlier.nearby(jd.bp.smooth[[1]],put.reg,chrom.ld,oChrom = "Chrom",oBP="pos")
put.reg$jostd.nb<-jost.nb
put.reg$jostd.in<-jost.in

So 1 loci are located within putative freshwater genes, but 0 are within the LD region of the putative genes, representing 0 of the putative gene annotations and 0 genes

Combining all of those analyses

*Genes containing shared Fst outliers: NA, AE, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, BMI1, CA, CA4, CAII, CAMKK1, CLINT1, cortisol, EFNB1, FBLN1, FLT4, IGFBP2, IGFBP5, IGFBP6, IL12B, KCNH4, Kir, Kir2.2, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NCX, NHE, OSBPL8, PDLIM7, PRKCD, PRL, RHOA, RHOGTP8, ROBO1, rtCR1, SCUBE1, SLC2A13, SOD3, SPG1, STAT3, SYNPO, TIMP3, TNS1, TRIM14, TSHB, VATPase, ZEB1

*Genes containing \(\delta\) -divergence outliers: ARHGEF3, OSBPL8, RHOGTP8

*Genes containing Salinity-associated outliers: ARHGEF3, NHE

*Genes containing Jost D outliers: NHE

*Overlap: ARHGEF3

*Divergence analyses:

*Near divergence analyses: NHE

But looking at put.reg[put.reg$Gene =="RHOGTP8","rad.loci"], it’s obvious that Rho GTPase-activating proteins are distributed throughout the genome (on chromosomes LG11, LG15, LG20, LG14, LG1, scaffold_1578, LG12, LG9, LG8, LG22, LG18, LG5, scaffold_1008, LG6, scaffold_950, LG10, LG7, scaffold_139, LG21, LG16, LG19).

`

Figure 6

Find genomic regions with high \(\pi\) and low \(\delta\)-divergence

#Do high pi and het have low deltad?
#do it per chrom
balancing.sel<-function(pi.out,pi,ht.out,ht,
                        dd.out,dd,jd.out,jd){
  pi.upp<-pi.out[pi.out$direction=="upper",]
  ht.upp<-ht.out[ht.out$direction=="upper",]
  dd.low<-dd.out[dd.out$direction=="lower",]
  jd.low<-jd.out[jd.out$direction=="lower",]
  shared.div<-pi.upp[pi.upp[,pi] %in% ht.upp[,ht],] 
  shared.dif<-dd.low[dd.low[,dd] %in% jd.low[,jd],] 
  bal<-shared.div[shared.div[,pi] %in% shared.dif[,dd]]
  return(list(div=shared.div,dif=shared.dif,bal=bal))
}

bal<-balancing.sel(pi.pp.smooth[[1]],"plot.pos",ht.pp.smooth[[1]],"plot.pos",
                   dd.pp.smooth[[1]],"plot.pos",jd.pp.smooth[[1]],"plot.pos")

shared.upp<-bal$div

There are loci with high \(\pi\) and low \(\delta\)-divergence on 13 chromosomes.

Instead of the lumped marine-freshwater \(F_{ST}\) values that I used originally, I’m going to plot the average of pairwise \(F_{ST}\) values.

fst.means<-fwsw.al
for(i in 1:nrow(fst.means)){
  if(fst.means$Locus.ID[i] %in% fwsw.fl$Locus.ID &
     fst.means$Locus.ID[i] %in% fwsw.la$Locus.ID &
     fst.means$Locus.ID[i] %in% fwsw.tx$Locus.ID){
      mu<-mean(fwsw.fl$Corrected.AMOVA.Fst[fwsw.fl$Locus.ID==fst.means$Locus.ID[i]],
               fwsw.la$Corrected.AMOVA.Fst[fwsw.la$Locus.ID==fst.means$Locus.ID[i]],
               fwsw.al$Corrected.AMOVA.Fst[fwsw.al$Locus.ID==fst.means$Locus.ID[i]],
               fwsw.tx$Corrected.AMOVA.Fst[fwsw.tx$Locus.ID==fst.means$Locus.ID[i]])
      fst.means$Corrected.AMOVA.Fst[i]<-mu
  }
  else{
    fst.means$Corrected.AMOVA.Fst[i]<-NA
  }
}
fst.means<-fst.means[!is.na(fst.means$Corrected.AMOVA.Fst),]
fwsw<-fst.means
fst.points<-FALSE
#and putative genes
put.genes<-read.delim("putative_genes.txt",header=TRUE,sep='\t')
#genome annotations
#put.reg<-read.delim("putative.gene.regions.tsv",header=T)
chrom.ld<-tapply(ld[ld$D.>0.5,"pos.diff"],ld[ld$D. >0.5,"Chrom"],mean)
#select genes of interest
#fav.genes<-c("AQP3","TNS1","CAMKK1","mucin","CAII","NAKATPase","ARHGEF3")
#fav.genes<-c("TNS1","CAII","TRIM14","VATPase")
fav.genes<-c("ARHGEF3", "mucin", "NHE", "TAAR")
genes2plot<-put.reg[put.reg$Gene %in% fav.genes,]
#shared Fst outliers
fw.sig.reg<-read.csv("p4.StacksFWSWOutliers_annotatedByGenome.csv")
h.pi.name<-"HandPi_subgenes.png"
row.settings<-c(4,4)
chroms2plot<-unique(shared.upp$Chr)

#generate xlims
xlims<-lapply(chroms2plot,function(lg,ld,genes,vcf){
  xs<-c(min(genes[genes$Chr == lg,"plot.pos"]-ld[lg]),
        max(genes[genes$Chr == lg,"plot.pos"]+ld[lg]))
  if(xs[1] < 0){
    xs[1]<-0
  }
  if(xs[2] > max(vcf$POS[vcf$`#CHROM`==lg])){
    xs[2]<-max(vcf$POS[vcf$`#CHROM`==lg])
  }
  return(xs)
},ld=chrom.ld,genes=shared.upp,vcf=vcf)
#or do full chroms
#generate xlims
xlims<-lapply(chroms2plot,function(lg,genes,vcf){
  xs<-c(min(vcf$POS[vcf$`#CHROM`==lg]),
        max(vcf$POS[vcf$`#CHROM`==lg]))
  return(xs)
},genes=shared.upp,vcf=vcf)
names(xlims)<-chroms2plot
#colors
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
#find the plotting location
nearest.pos<-function(stat.df,s.pos,s.stat,target,t.pos,t.stat){
  near.pos<-t(apply(stat.df,1,function(df,target){
    x<-as.numeric(df[s.pos])
    pos<-target[which.min(abs(x-as.numeric(target[,t.pos]))),t.pos]
    if(pos==x){
      stat<-as.numeric(target[which.min(abs(x-as.numeric(target[,t.pos]))),t.stat])
    }else{
      if(pos>x){
        upp<-which.min(abs(x-target[,t.pos]))
        low<-upp-1
      }else{
        low<-which.min(abs(x-target[,t.pos]))
        upp<-low+1
      }
      upp.pt<-target[upp,c(t.pos,t.stat)]
      low.pt<-target[low,c(t.pos,t.stat)]
      slope<-as.numeric((upp.pt[2]-low.pt[2])/(upp.pt[1]-low.pt[1]))
      b<-as.numeric(upp.pt[2]-(slope*upp.pt[1]))
      stat<-slope*x+b
    }
    return(cbind(x,stat))
  },target=target))
  return(near.pos)
}
upp.low.pts<-function(smooth,target,chrom,color,stat,pos.name,...){
  #smooth== smooth[[1]]
  #target== smooth[[2]]
  upp<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="upper",]
  low<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="lower",]
  upp.pts<-nearest.pos(stat.df=upp,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  low.pts<-nearest.pos(stat.df=low,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  points(x=upp.pts[,1],y=upp.pts[,2],pch=24,bg=color,col=color,...)
  points(x=low.pts[,1],y=low.pts[,2],pch=25,bg=color,col=color,...)
}
png(h.pi.name,height=8,width=14,units="in",res=300)
par(mfrow=row.settings,oma=c(1,1,1,1),mar=c(1,2,2,1))
for(i in 1:length(chroms2plot)){
  this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
  this.xlim<-xlims[[as.character(chroms2plot[i])]]
  plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.2,0.5),axes=F,ylab="",xlab="",type='n')
  xmin<-this.xlim[1]#min(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  xmax<-this.xlim[2]#max(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  
  #the shared peaks
  p<-lapply(shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]],
         function(pos){
           points(y=c(-0.2,0.5),x=c(pos,pos),
                  type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
         })
  
  points(y=rep(c(-0.2,0.5),length(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]])),
         x=c(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]],
             shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]]),
         type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
  #putative gene regions
  g<-genes2plot[genes2plot$Chrom %in% chroms2plot[i] & 
                  genes2plot$StartBP >= xmin & genes2plot$StartBP <= xmax,]
  a<-put.reg[put.reg$Chrom %in% chroms2plot[i] & !(put.reg$Gene %in% fav.genes),]
  
  if(nrow(g) > 0){
    rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
       ybottom=-0.2,ytop=0.44,col="indianred",border="indianred")
  }
  
  #Fst
  if(fst.points==TRUE){
    points(this.df$BP,this.df$Corrected.AMOVA.Fst,pch=19,cex=1.5,
           col=alpha(col=comp.col["Fst"],0.25),bg=alpha(col=comp.col["Fst"],0.25))
  }
  #Pi
  points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",lwd=2,col=comp.col["pi"])
  upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["pi"],stat="Pi",pos.name="Pos")
  #Het
  points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["Het"],lwd=2)
  upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["Het"],stat="Het",pos.name="Pos")
  
  #deltad
  points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["deltad"],lwd=2)
  upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["deltad"],stat="deltad",pos.name="Pos")

  #Josts D
  points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["D"],lwd=2)
  upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["D"],stat="D",pos.name="Pos")
  
  #shared Fst outliers
  points(this.df$BP[this.df$BP %in% fw.sig.reg$BP],
         this.df$Corrected.AMOVA.Fst[this.df$BP %in% fw.sig.reg$BP],
         pch=8,cex=2,col="orchid4",lwd=3)
  if(i == 1){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    txt.locs[3,"starts"]<-txt.locs[3,"starts"]-500000
    txt.locs[4,"starts"]<-txt.locs[4,"starts"]+500000
    txt.locs[6,"starts"]<-txt.locs[6,"starts"]-500000
    txt.locs[7,"starts"]<-txt.locs[7,"starts"]+500000
    txt.locs<-txt.locs[-5,]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)  
  }
  if(i == 4){
    if(nrow(g)>0){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    txt.locs<-txt.locs[-4,]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T) 
    }
  }
  if(!i %in% c(1,4)){
    if(nrow(g)>0){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
    }
  }
  #axes etc
   axis(1,pos=-0.2,c(xmin,xmax),
       labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=2)
  axis(2,las=1,hadj=0.75,cex.axis=2,pos=xmin,at=c(-0.25,0,0.25,0.5))
  mtext(chroms2plot[i],1,cex=2*0.75,line=1)
}
#par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0),mar=c(0, 0, 0, 0), new=TRUE)
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c("Putative Gene",
                expression(Shared~italic(F)[ST]~Outlier),
                expression(Large~pi~and~italic(H))),
       bty='n',pch=c(15,8,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c("indianred","orchid4",alpha("#543005",0.75)))
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c(expression(italic(H)),
                expression(pi),#expression(FW-SW~italic(F)[ST]),
                expression("Jost's"~italic(D)),
                expression(delta~-divergence)),
       bty='n',pch=c(15,15,15,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c(comp.col[1:2],#alpha(col=comp.col["Fst"],0.25)
             comp.col[4:5]))
dev.off()
## png 
##   2
Figure 6. Low -divergence and high

Figure 6. Low -divergence and high

Look for regions of reduced diversity as evidence of selective sweeps.

upp.pi<-NULL
low.pi<-NULL
upp.het<-NULL
low.het<-NULL
upp.dd<-NULL
low.dd<-NULL
for(i in 1:length(lgs)){
  upp.pi<-rbind(upp.pi,avg.pi.adj[avg.pi.adj$Avg.Stat >= 
                     quantile(avg.pi.adj$Avg.Stat[avg.pi.adj$Chr %in% lgs[i]],0.95) &
                     avg.pi.adj$Chr %in% lgs[i],])
  low.pi<-rbind(low.pi,avg.pi.adj[avg.pi.adj$Avg.Stat <= 
                     quantile(avg.pi.adj$Avg.Stat[avg.pi.adj$Chr %in% lgs[i]],0.05) &
                     avg.pi.adj$Chr %in% lgs[i],])

  upp.het<-rbind(upp.het,avg.het.adj[avg.het.adj$Avg.Stat >= 
                       quantile(avg.het.adj$Avg.Stat[avg.het.adj$Chr %in% lgs[i]],0.95) &
                       avg.het.adj$Chr %in% lgs[i],])
  low.het<-rbind(low.het,avg.het.adj[avg.het.adj$Avg.Stat <= 
                       quantile(avg.het.adj$Avg.Stat[avg.het.adj$Chr %in% lgs[i]],0.05) &
                       avg.het.adj$Chr %in% lgs[i],])

  
}
shared.low<-low.pi[low.pi$plot.pos %in% low.het$plot.pos,]
shared.low$plot.min<-shared.low$Avg.Pos-250000
shared.low$plot.max<-shared.low$Avg.Pos+250000

h.pi.name<-"lowdiv_subgenes.png"
row.settings<-c(4,3)
chroms2plot<-unique(shared.low$Chr)
shared.upp<-shared.low
fst.points<-FALSE
#colors
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
#find the plotting location
nearest.pos<-function(stat.df,s.pos,s.stat,target,t.pos,t.stat){
  near.pos<-t(apply(stat.df,1,function(df,target){
    x<-as.numeric(df[s.pos])
    pos<-target[which.min(abs(x-as.numeric(target[,t.pos]))),t.pos]
    if(pos==x){
      stat<-as.numeric(target[which.min(abs(x-as.numeric(target[,t.pos]))),t.stat])
    }else{
      if(pos>x){
        upp<-which.min(abs(x-target[,t.pos]))
        low<-upp-1
      }else{
        low<-which.min(abs(x-target[,t.pos]))
        upp<-low+1
      }
      upp.pt<-target[upp,c(t.pos,t.stat)]
      low.pt<-target[low,c(t.pos,t.stat)]
      slope<-as.numeric((upp.pt[2]-low.pt[2])/(upp.pt[1]-low.pt[1]))
      b<-as.numeric(upp.pt[2]-(slope*upp.pt[1]))
      stat<-slope*x+b
    }
    return(cbind(x,stat))
  },target=target))
  return(near.pos)
}
upp.low.pts<-function(smooth,target,chrom,color,stat,pos.name,...){
  #smooth== smooth[[1]]
  #target== smooth[[2]]
  upp<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="upper",]
  low<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="lower",]
  upp.pts<-nearest.pos(stat.df=upp,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  low.pts<-nearest.pos(stat.df=low,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  points(x=upp.pts[,1],y=upp.pts[,2],pch=24,bg=color,col=color,...)
  points(x=low.pts[,1],y=low.pts[,2],pch=25,bg=color,col=color,...)
}
png(h.pi.name,height=8,width=14,units="in",res=300)
par(mfrow=row.settings,oma=c(1,1,1,1),mar=c(1,2,2,1))
for(i in 1:length(chroms2plot)){
  this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
  this.xlim<-xlims[[as.character(chroms2plot[i])]]
  plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.2,0.5),axes=F,ylab="",xlab="",type='n')
  xmin<-this.xlim[1]#min(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  xmax<-this.xlim[2]#max(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  
  #the shared peaks
  p<-lapply(shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]],
         function(pos){
           points(y=c(-0.2,0.5),x=c(pos,pos),
                  type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
         })
  
  points(y=rep(c(-0.2,0.5),length(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]])),
         x=c(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]],
             shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]]),
         type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
  #putative gene regions
  g<-genes2plot[genes2plot$Chrom %in% chroms2plot[i] & 
                  genes2plot$StartBP >= xmin & genes2plot$StartBP <= xmax,]
  a<-put.reg[put.reg$Chrom %in% chroms2plot[i] & !(put.reg$Gene %in% fav.genes),]
  
  if(nrow(g) > 0){
    rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
       ybottom=-0.2,ytop=0.44,col="indianred",border="indianred")
  }
  
  #Fst
  if(fst.points==TRUE){
    points(this.df$BP,this.df$Corrected.AMOVA.Fst,pch=19,cex=1.5,
           col=alpha(col=comp.col["Fst"],0.25),bg=alpha(col=comp.col["Fst"],0.25))
  }
  #Pi
  points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",lwd=2,col=comp.col["pi"])
  upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["pi"],stat="Pi",pos.name="Pos")
  #Het
  points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["Het"],lwd=2)
  upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["Het"],stat="Het",pos.name="Pos")
  
  #deltad
  points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["deltad"],lwd=2)
  upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["deltad"],stat="deltad",pos.name="Pos")

  #Josts D
  points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["D"],lwd=2)
  upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["D"],stat="D",pos.name="Pos")
  
  #shared Fst outliers
  points(this.df$BP[this.df$BP %in% fw.sig.reg$BP],
         this.df$Corrected.AMOVA.Fst[this.df$BP %in% fw.sig.reg$BP],
         pch=8,cex=2,col="orchid4",lwd=3)
  if(i == 1){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    txt.locs[3,"starts"]<-txt.locs[3,"starts"]-500000
    txt.locs[4,"starts"]<-txt.locs[4,"starts"]+500000
    txt.locs[6,"starts"]<-txt.locs[6,"starts"]-500000
    txt.locs[7,"starts"]<-txt.locs[7,"starts"]+500000
    txt.locs<-txt.locs[-5,]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)  
  }
  if(i == 4){
    if(nrow(g)>0){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    txt.locs<-txt.locs[-4,]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T) 
    }
  }
  if(!i %in% c(1,4)){
    if(nrow(g)>0){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
    }
  }
  #axes etc
   axis(1,pos=-0.2,c(xmin,xmax),
       labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=2)
  axis(2,las=1,hadj=0.75,cex.axis=2,pos=xmin,at=c(-0.25,0,0.25,0.5))
  mtext(chroms2plot[i],1,cex=2*0.75,line=1)
}
#par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0),mar=c(0, 0, 0, 0), new=TRUE)
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c("Putative Gene",
                expression(Shared~italic(F)[ST]~Outlier),
                expression(Large~pi~and~italic(H))),
       bty='n',pch=c(15,8,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c("indianred","orchid4",alpha("#543005",0.75)))
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c(expression(italic(H)),
                expression(pi),#expression(FW-SW~italic(F)[ST]),
                expression("Jost's"~italic(D)),
                expression(delta~-divergence)),
       bty='n',pch=c(15,15,15,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c(comp.col[1:2],#alpha(col=comp.col["Fst"],0.25)
             comp.col[4:5]))
dev.off()
## png 
##   2
Low pi and low het

Low pi and low het

Test for selective sweeps

To look for selective sweeps, I’m going to use the FLK R code by Bonhomme et al, downloaded from https://qgsp.jouy.inra.fr/index.php?option=com_content&view=article&id=50&Itemid=55

The mandatory input is a matrix of allele frequencies for each population.

calc.allFreqs<-function(vcf,pop.list, pop.labs){
  allFreqs<-do.call(rbind,lapply(pop.list, function(pop){
    this.vcf<-cbind(vcf[,1:9],vcf$SNP,vcf[,grep(pop,colnames(vcf))])
    afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
    return(afs$RefFreq)
  }))
  colnames(allFreqs)<-vcf$SNP
  rownames(allFreqs)<-pop.labs
  return(allFreqs)
}

freqs<-calc.allFreqs(remove.missing.data(vcf, pop.list),pop.list,pop.labs)
write.table(freqs,"flk.freqs",row.names = TRUE)
source("../scripts/FLK.R")
library(ape); library(phangorn)
freqs<-read.table("flk.freqs",row.names=1)
## estimate the population tree using Reynolds distances
## computed on the SNP data, without specifying an outgroup
Fsnp<-Fij(freqs)

## Now compute the FLK and LK tests
tests<-FLK(freqs,Fsnp) 

Then I run the programs/FLKnull.py program on the output to estimate the empirical null distribution. Once that has run, we use the following code provided by Bonhomme et al to plot the null distribution

null<-read.table('envelope.txt',head=T)
plot(tests$Ht,tests$F.LK,pch=19,col="gray",xlab='Heterozygosity',ylab='FLK statistic',xlim=c(0,0.5))
lines(null$Ht,null$q0.005,type='l',col='navy')
lines(null$Ht,null$q0.995,type='l',col='navy')
lines(null$Ht,null$q0.025,type='l',col='navy')
lines(null$Ht,null$q0.975,type='l',col='navy')
lines(null$Ht,null$q0.5,type='l',col='navy')
lines(smooth.spline(null$Ht,null$q0.995))
lines(smooth.spline(null$Ht,null$q0.005))
lines(smooth.spline(null$Ht,null$q0.025),lty=2)
lines(smooth.spline(null$Ht,null$q0.975),lty=2)
lines(smooth.spline(null$Ht,null$q0.5),lty=3)

Use PopGenome to do the CLR method. First I need to convert my vcf file to a tabix-ed vcf file, using bgzip p4.upd.vcf followed by tabix -p vcf p4.upd.vcf.gz, and saved these in the popgenome/ directory.

mins<-tapply(X = vcf$POS,INDEX = vcf$`#CHROM`,min)[scaffs]
maxs<-tapply(X = vcf$POS,INDEX = vcf$`#CHROM`,max)[scaffs]
snps<-tapply(X = vcf$POS,INDEX = vcf$`#CHROM`,length)[scaffs]
library(PopGenome)
GENOME.class <- readVCF("popgenome/p4.upd.vcf.gz",numcols=snps["LG1"],tid="LG1",
                  frompos=mins["LG1"],topos=maxs["LG1"],
                  include.unknown = TRUE)
TXSP<-grep("TXSP",colnames(vcf),value = TRUE)
TXCC<-grep("TXCC",colnames(vcf),value = TRUE)
TXFW<-grep("TXFW",colnames(vcf),value = TRUE)
TXCB<-grep("TXCB",colnames(vcf),value = TRUE)
LAFW<-grep("LAFW",colnames(vcf),value = TRUE)
ALST<-grep("ALST",colnames(vcf),value = TRUE)
ALFW<-grep("ALFW",colnames(vcf),value = TRUE)
FLSG<-grep("FLSG",colnames(vcf),value = TRUE)
FLKB<-grep("FLKB",colnames(vcf),value = TRUE)
FLFD<-grep("FLFD",colnames(vcf),value = TRUE)
FLSI<-grep("FLSI",colnames(vcf),value = TRUE)
FLAB<-grep("FLAB",colnames(vcf),value = TRUE)
FLPB<-grep("FLPB",colnames(vcf),value = TRUE)
FLHB<-grep("FLHB",colnames(vcf),value = TRUE)
FLCC<-grep("FLCC",colnames(vcf),value = TRUE)
FLFW<-grep("FLFW",colnames(vcf),value = TRUE)
genvcf<-set.populations(genvcf,
                        list(TXSP,TXCC,TXFW,TXCB,LAFW,ALST,ALFW,FLSG,FLKB,FLFD,FLSI,FLAB,FLPB,FLHB,FLCC,FLFW),
                        diploid=TRUE)

Convert to sweepfinder2 format:

#one file for each chromosome
#first column is position
#second column is count of derived alleles
#third column is the total number of observed alleles
#fourth column indicates whether it's derived or ancestral. 

vcf2sfaf<-function(vcf,lgs){
  chrs<-lapply(lgs,function(lg){
    gts<-extract.gt.vcf(vcf[vcf$`#CHROM` == lg,])
    sf.af<-do.call(rbind,apply(gts,1,function(row){
      gt<-row[10:length(row)]
      ref<-(length(gt[gt=="0/0"])*2)+
        length(gt[gt=="0/1"])+length(gt[gt=="1/0"])
      alt<-(length(gt[gt=="1/1"])*2)+
        length(gt[gt=="0/1"])+length(gt[gt=="1/0"])
      return(data.frame(position=row["POS"],x=ref,n=alt))
    }))
    write.table(sf.af,paste("SF2/",lg,".AF.txt",sep=""),
                col.names = TRUE,row.names=FALSE,sep='\t',
                quote=FALSE,eol='\n')
    print(paste("Writing to file: SF2/",lg,".AF.txt",sep=""))
    return(sf.af)
  })
}
af<-vcf2sfaf(vcf,lgs)

Make separate files

header<-"##fileformat=VCFv4.2"
for(i in 1:length(lgs)){
  write.table(header,paste(lgs[i],".vcf",sep=""),quote=FALSE,
              col.names=FALSE,row.names=FALSE )
  write.table(vcf[vcf$`#CHROM`==lgs[i],-708],paste(lgs[i],".vcf",sep=""),
              col.names=TRUE,row.names=FALSE,quote=FALSE,sep='\t',append = TRUE)
}

Figure 7

I’m highlighting a few of the putative genes that have a bunch of outliers nearby or in them. First is TNS1, which matched three genome annotations but only had one region, on LG1, that contained shared outlier Fsts, delta divergence, Bayenv salinity, were near monophyletic neighbor joining trees.

fst.dfs<-list(fwsw.tx,fwsw.la,fwsw.al,fwsw.fl)
names(fst.dfs)<-c("TX FWSW","LA FWSW","AL FWSW","FL FWSW")
colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)

Plotting function

The plotting function gene.region.plot accpets a number of parameters.

gene.region.plot<-function(chrom,gene,put.reg,vcf,chrom.ld, fst.dfs,deltad=FALSE,D=FALSE,
                           colors="black",lwds=2,alphas=0.5,ltys=1,legend=TRUE,bases="bp", 
                           smooth=FALSE, smooth.loess=TRUE,fst.name="Corrected.AMOVA.FST",txt.cex=1,y.lim=c(0,1),...){
  pstart<-min(as.numeric(as.character(put.reg[put.reg$Gene ==gene & 
                                                     put.reg$Chrom==chrom,"StartBP"])))-(chrom.ld[chrom]/2)
  pstop<-max(as.numeric(as.character(put.reg[put.reg$Gene ==gene & 
                                                    put.reg$Chrom==chrom,"StartBP"])))+(chrom.ld[chrom]/2)
  if(pstart < 0){ pstart<- 0 }
  if(pstop > max(vcf[vcf$`#CHROM`==chrom,"POS"])){ pstop <- max(vcf[vcf$`#CHROM`==chrom,"POS"]) }
  starts<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StartBP"])
  stops<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StopBP"])
  
  if(smooth==TRUE){
    #generate the dataframes
    smooth.fsts<-lapply(fst.dfs,function(df){
      this.df<-df[df$Chr %in% chrom,]
      this.smooth<-do.call("rbind",lapply(seq(1,nrow(this.df),(nrow(this.df)*0.15)/5),sliding.avg,
                                        dat=data.frame(Pos=this.df$BP,Fst=this.df[,fst.name]),
                                        width=nrow(this.df)*0.15))
      return(this.smooth)
    })
  }  else{
    smooth.fsts<-lapply(fst.dfs,function(df){
      this.smooth<-df[df$Chr %in% chrom,c("BP",fst.name)]
    })
  }
  
  if(is.data.frame(deltad)){
    this.dd<-deltad[deltad$Chrom %in% chrom,]
    if(smooth.loess==TRUE){
      dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
      names(dd.smooth)<-c("pos","smoothed.stats")
    }else{
      dd.smooth<-this.dd
    }
  }else if(is.list(deltad)){
    dd.smooth<-deltad[[2]][deltad[[2]][,"chr"]%in%chrom,]
  }else{
    dd.smooth<-NULL
  }
  
  if(is.data.frame(D)){
    this.d<-D[D$Chr %in% chrom,]
    if(smooth.loess==TRUE) {
      dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2)
      names(dd.smooth)<-c("pos","smoothed.stats")
    }else{
      dsmooth<-this.d
    } 
  }else if(is.list(D)){
    dsmooth<-D[[2]][D[[2]][,"chr"]%in%chrom,]
  }else{
    D<-NULL
  }
  
  pr.gene<-put.reg[put.reg$Chrom==chrom & put.reg$Gene==gene,]
  fst.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$rad.loci[pr.gene$rad.loci != "NA"]),","))
  sal.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfs.in[pr.gene$bfs.in != "NA"]),","))
 # tmp.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bft.in[pr.gene$bft.in != "NA"]),","))
  #grs.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfg.in[pr.gene$bfg.in != "NA"]),","))
  #jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jostd.nb[jostd.nb != "NA"]),","))
  sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
 #nj.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nj.nb[pr.gene$nj.nb != "NA"]),",")))
  nj.gene<-NULL
  nb.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nearby.rad[pr.gene$nearby.rad != "NA"]),",")))
  gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
                       shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
  
  if(length(colors) != length(smooth.fsts)){
    colors<-rep(colors,length(smooth.fsts)/length(colors))
  }
  if(length(lwds) != length(smooth.fsts)){
    lwds<-rep(lwds,length(smooth.fsts)/length(lwds))
  }
  if(length(alphas) != length(smooth.fsts)){
    alphas<-rep(alphas,length(smooth.fsts)/length(alphas))
  }
  if(length(ltys) != length(smooth.fsts)){
    ltys<-rep(ltys,length(smooth.fsts)/length(ltys))
  }
  
  plot(smooth.fsts[[1]],type='n',ylim=c(y.lim[1],y.lim[2]+0.02),
       bty="L",xlab="",ylab="",xaxt='n',yaxt='n',xlim=c(pstart,pstop))
  axis(2,las=1,cex.axis=txt.cex)  
  xlabs<-c(pstart,pstop)
  if(bases %in% c("mb","MB","Mb")) xlabs<-c(round((pstart/1000000),2),round((pstop/1000000),2))
  if(bases %in% c("kb","KB","Kb")) xlabs<-c(round((pstart/1000),2),round((pstop/1000),2))
  axis(1,at=c(pstart,pstop),cex.axis=txt.cex,labels = xlabs)
  #add putative gene
  rect(xleft=as.numeric(starts),xright=as.numeric(stops),
         ybottom=y.lim[1]-0.04,ytop=y.lim[2],col="indianred",border="indianred")
  #add fsts
  mtext(chrom,1,cex=0.75*txt.cex,line = 1)
  for(i in 1:length(smooth.fsts)){
    points(smooth.fsts[[i]],col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
  }
  
  #add delta divergence
  if(is.data.frame(dd.smooth)){
    points(dd.smooth$pos,dd.smooth$smoothed.stats,col="#dfc27d",type="l",lwd=2)
  }
  if(is.list(deltad)){
    upp.low.pts(smooth=deltad[[1]],target=deltad[[2]],chrom=chrom,color=comp.col["deltad"],stat="deltad",pos.name="Pos")
  }
  
  #add Jost's D
  if(is.data.frame(dsmooth)){
    points(dsmooth$pos,dsmooth$smoothed.stats,type="l",col="#a6611a",lwd=2)
  }
  if(is.list(D)){
    upp.low.pts(smooth=D[[1]],target=D[[2]],chrom=chrom,color=comp.col["D"],stat="D",pos.name="Pos")
  }
  
  
  #are nj trees shown in scope of fig?
  njs.eval<-lapply(nj.gene,function(x){
    if(x >= pstart & x <= pstop) { eval = TRUE }
    else { eval = FALSE }
    return(eval)
  })
  
  #add delta d
  if(is.data.frame(deltad)){
    lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
    lgnd<-c(lgnd,expression(delta~-divergence))
  }else{
    lgnd<-unlist(lapply(names(fst.dfs),function(n) { substitute(paste(name,italic(F[ST]),sep=""),list(name=n)) }))

  }
  
  cols<-NULL
  pchs<-rep(32,length(lwds))
  for(i in 1:length(colors)){
    cols<-c(cols,alpha(colors[i],alphas[i]))
  }
  if(!is.null(deltad)){
    cols<-c(cols,"#dfc27d")
    pchs<-c(pchs,32)
    ltys<-c(ltys,1)
    lwds<-c(lwds,2)
  }
  if(!is.null(D)){
    cols<-c(cols,"#a6611a")
    pchs<-c(pchs,32)
    lgnd<-c(lgnd,"Jost's D")
    ltys<-c(ltys,1)
    lwds<-c(lwds,2)
  }
  
  lgnd<-c(lgnd,substitute(italic(g),list(g=gene)),"Outlier RAD loci")
  
  cols<-c(cols,"indianred","black")
  pchs<-c(pchs,15,124)
  if(length(grep(TRUE,njs.eval))>0){
    lgnd<-c(lgnd,"Monophyletic Gene Tree")
    cols<-c(cols,"#08519c")
    pchs<-c(pchs,124)
  }
  
  if(length(sal.gene)>0){
    lgnd<-c(lgnd,"Salinity-associated")
    cols<-c(cols,"black")
    pchs<-c(pchs,25)
  }
  if(legend==TRUE){
  legend("topleft",
         legend=lgnd,pch=pchs,
         bty='n',lwd=c(lwds,0,0,0,0),lty=c(ltys,0,0,0,0),
         col=cols,cex = txt.cex)
  }
  #add outlier loci
  clip(x1=min(as.numeric(pstart)),x2=max(as.numeric(pstop)),y1=y.lim[2]+.01,y2=y.lim[2]+0.2)
  abline(v=fst.gene,col="black")
  points(x=sal.gene,y=rep(y.lim[2]+0.05,length(sal.gene)),col="black",pch=25,cex=0.75*txt.cex,lwd=2)
  abline(v=nb.gene,col=alpha("black",0.5),cex=2)
  abline(v=nj.gene,col="#08519c",cex=2)
}

outside.legend<-function(...){
  opar <- par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), 
    mar=c(0, 0, 0, 0), new=TRUE)
  on.exit(par(opar))
  plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
  legend(...)
}

Plot all genes - for supplement

#set up legend parameters
leg.text<-c(expression(TX~FWSW~italic(F)[ST]),expression(LA~FWSW~italic(F)[ST]),
            expression(AL~FWSW~italic(F)[ST]),expression(FL~FWSW~italic(F)[ST]),
            expression(delta~-divergence),expression(Shared~italic(F)[ST]~Outliers),
            "Salinity-associated SNPs")
leg.pchs<-c(rep(32,5),124,25,124)
leg.ltys<-c(1,1,2,1,1,0,0,0)
leg.cols<-c(colors,"cornflowerblue","black","black","indianred")

png("ARGHEF3.png",height=8,width=10,units="in",res=300)       
par(mfrow=c(5,3),oma=c(2,2,3,2),mar=c(2,2,1,1))
arhgef3<-lapply(unique(put.reg[put.reg$Gene=="ARHGEF3" & put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
       gene.region.plot,gene="ARHGEF3",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(ARHGEF3))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()
## png 
##   2
png("mucin.png",height=8,width=10,units="in",res=300)       
par(mfrow=c(5,3),oma=c(2,2,3,2),mar=c(2,2,1,1))
mucin<-lapply(unique(put.reg[put.reg$Gene=="mucin"& put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
       gene.region.plot,gene="mucin",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(mucin))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()
## png 
##   2
png("NHE.png",height=4,width=6,units="in",res=300)       
par(mfrow=c(2,4),oma=c(2,2,3,2),mar=c(2,2,2,2))
nhe<-lapply(unique(put.reg[put.reg$Gene=="NHE" & put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
       gene.region.plot,gene="NHE",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(NHE))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()
## png 
##   2
png("TAAR.png",height=4,width=6,units="in",res=300)       
par(mfrow=c(1,2),oma=c(2,2,3,2),mar=c(2,2,2,2))
taar<-lapply(unique(put.reg[put.reg$Gene=="TAAR" & put.reg$Chrom%in%lgs,"Chrom"]), #& !is.na(put.reg$rad.loci)
       gene.region.plot,gene="TAAR",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(TAAR))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=.75,x.intersp=0,col=leg.cols)
dev.off()
## png 
##   2
Supplmental Fig. 5

Supplmental Fig. 5

Supplemental Fig. 6

Supplemental Fig. 6

Supplemental Fig. 7

Supplemental Fig. 7

Supplemental Fig. 8

Supplemental Fig. 8

Figure 7

colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)

png("Fig7candidateGenes.png",height=10,width=10,units="in",res=300)

par(mfrow=c(4,3),oma=c(2,2,4.5,2),mar=c(1,3,2,1))
#row1
gene.region.plot("LG1","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)

gene.region.plot("LG5","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)

gene.region.plot("LG11","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=3000000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)

#row2
gene.region.plot("LG2","mucin",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(mucin)),xpd=T,cex=2)

gene.region.plot("LG4","TAAR",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=15000000,y=0.5,expression(italic(TAAR)),xpd=T,cex=2)

gene.region.plot("LG7","TAAR",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=3000000,y=0.5,expression(italic(TAAR)),xpd=T,cex=2)

#row3
gene.region.plot("LG7","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=8500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG13","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=2200000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG14","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=6500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

#row4
gene.region.plot("LG18","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=5500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG19","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4000000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG20","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4000000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)


outside.legend("top",legend=c(expression(TX~FWSW~italic(F)[ST]),expression(LA~FWSW~italic(F)[ST]),
            expression(AL~FWSW~italic(F)[ST]),expression(FL~FWSW~italic(F)[ST]),
            expression(delta~-divergence),expression("Jost's"~italic(D)),
            expression(Shared~italic(F)[ST]~Outliers),
            "Salinity-associated SNPs","FW gene"),
            pch=c(rep(32,6),124,25,124),lty=c(1,1,2,1,1,1,0,0,0),lwd=2,
               bty='n',ncol=5,cex=1.25,x.intersp=0,
            col=c(colors,comp.col["deltad"],comp.col["D"],"black","black","indianred"))
# 
# par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
# plot(0:1.5, 0:1.5, type="n", xlab="", ylab="", axes=FALSE)
# legend(x=0.5,y=0.22,c(expression(Shared~italic(F)[ST]~Outliers),
#                 "Salinity-associated SNPs",
#                 "FW Candidate Gene"),
#        pch=c(124,25,124),lty=c(0,0,0),lwd=2,
#        col=c("black","black","indianred"),
#        bty='n',ncol=1,cex=2,x.intersp=-0.5,xpd=T)
# 
# 
# #add lines to the top
# par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
# plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
# legend("top",c(expression(TX~FWSW~italic(F)[ST]),
#                 expression(LA~FWSW~italic(F)[ST]),
#                 expression(AL~FWSW~italic(F)[ST]),
#                 expression(FL~FWSW~italic(F)[ST]),
#                 expression(delta~-divergence)),
#        pch=rep(32,5),lty=c(1,1,1,1,1),lwd=2,text.width=0.16,
#        col=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
#           "#dfc27d"),
#        bty='n',ncol=3,cex=2,x.intersp=0.5,y.intersp=0.75,xpd=T)
dev.off()
## png 
##   2
Fig. 7

Fig. 7

Figure 8: LG4

LG4 clearly is enriched for outliers, let’s look at it more closely, including putative FW genes.

#define a few things
chrom<-"LG4"
pchs<-c(rep(32,6),15,124,124)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
        "#dfc27d","#a6611a","indianred","black","#08519c")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,0)

#put together the data for LG4
#this.dd<-deltad[deltad$Chrom %in% chrom,]
#dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
#add Jost's D
#this.d<-jostd[jostd$Chr %in% chrom,]
#dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2) 
#get the outliers
pr.gene<-put.reg[put.reg$Chrom==chrom ,]
#rad loci 
fst.gene<- stacks.sig[stacks.sig$Chr==chrom,"BP"]
#none of the salinity-associated RAD loci are in or near genes on LG8, but we could plot them all anyway
sal.gene<-sal.bf.sig[sal.bf.sig$scaffold=="LG8","BP"]
#jost's d - none are in the putative genes
jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jost.in[jost.in != "NA"]),","))
#delta divergence outliers- none are in the putative genes
sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
#put all of them together with their associated shapes
gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
                     shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
#add putative genes
g<-put.reg[put.reg$Chrom %in% chrom,]
  g$Gene<-as.character(g$Gene)
  g$Gene[grep("ATP6",g$Gene)]<-"ATP6V" #standardize the names
  g<-g[,c("Gene","StartBP","StopBP")]
  g<-g[!duplicated(g$StartBP),]
  g<-g[order(g$StartBP),]
png("LG4.png",width=10,height=7,units="in",res=300)

nf <- layout(matrix(c(1,1,1,1,
                      2,2,2,2), nrow=2, byrow=TRUE))

### Plot all of LG4
par(oma=c(2.5,3,2,1),mar=c(1.5,1,2,1))
plot(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom],
     fst.dfs[[1]]$Smoothed.AMOVA.Fst[fst.dfs[[1]]$Chr %in% chrom],
     type='n',ylim=c(0,1),bty="L",xlab="",ylab="",xaxt='n',yaxt='n')
  axis(2,las=1,cex.axis=1.75)  
  axis(1,cex.axis=1.75,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3))
  #add fsts
  #mtext(chrom,1,cex=2*0.75,line=2.5)
  for(i in 1:length(fst.dfs)){
    points(fst.dfs[[i]]$BP[fst.dfs[[i]]$Chr %in% chrom],
           fst.dfs[[i]]$Smoothed.AMOVA.Fst[fst.dfs[[i]]$Chr %in% chrom],
           col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
  }
  #add delta-d and Jost's D
  #Pi
  points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
       type="l",lwd=2,col=comp.col["pi"])
  upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],
              chrom=chrom,color=comp.col["pi"],stat="Pi",pos.name="Pos",cex=1.75)
  #Het
  points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
         type="l",col=comp.col["Het"],lwd=2)
  upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],
              chrom=chrom,color=comp.col["Het"],stat="Het",pos.name="Pos",cex=1.75)
  
  #deltad
  points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
         type="l",col=comp.col["deltad"],lwd=2)
  upp.low.pts(smooth=dd.bp.smooth[[1]],target=dd.bp.smooth[[2]],
              chrom=chrom,color=comp.col["deltad"],stat="deltad",pos.name="Pos",cex=1.75)

  #Josts D
  points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
         type="l",col=comp.col["D"],lwd=2)
  upp.low.pts(smooth=jd.bp.smooth[[1]],target=jd.bp.smooth[[2]],
              chrom=chrom,color=comp.col["D"],stat="D",pos.name="Pos",cex=1.75)
  
  
   #add the putative genes
  starts<-as.numeric(g[,"StartBP"])
  stops<-as.numeric(g[,"StopBP"])
  rect(xleft=as.numeric(starts),xright=as.numeric(stops),
         ybottom=-0.04,ytop=1,col="indianred",border="indianred")
  #add the ones that are spaced out
  text(x=g$StartBP[!g$Gene %in% c("SCUBE1","TRIM14")],y=0.5,
       labels=g$Gene[!g$Gene %in% c("SCUBE1","TRIM14")],font=2,srt=90,xpd=T,cex=1.75)
  text(x=g$StartBP[g$Gene == "SCUBE1"]-50000,y=0.8,
       labels=g$Gene[g$Gene =="SCUBE1"],font=2,srt=90,xpd=T,cex=1.75)
  text(x=g$StartBP[g$Gene == "TRIM14"]+50000,y=0.2,
       labels=g$Gene[g$Gene =="TRIM14"],font=2,srt=90,xpd=T,cex=1.75)
  #add outlier loci
  clip(x1=min(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),
       x2=max(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),y1=0.99,y2=1.06)
  abline(v=fst.gene,col="black",lwd=1.5)
  points(x=sal.gene,y=rep(1.05,length(sal.gene)),col="black",pch=25,cex=1)
  

### Plot the recombination rate
load("../../scovelli_genome/ssc_map.rda")
rec.cols<-c("#fdbb84","#fc8d59","#ef6548")
plot(set[["LG4"]]@interpolations$slidingwindow@physicalPositions,
     set[["LG4"]]@interpolations$slidingwindow@rates,type="l",bty="L",
     ylim=c(-5,25),xlab="",ylab="",xaxt='n',yaxt='n',lwd=2,col=rec.cols[1])
abline(h=0,lty=2)
axis(2,cex.axis=1.75,las=1)
axis(1,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3),cex.axis=1.75)
mtext("Position on LG4 (Mb)",1,line=2.2,cex=1.5*0.75)
mtext("Recombination Rate (cM/Mb)",2,line=2.5,cex=1.5*0.75)
lines(set[["LG4"]]@interpolations$spline@physicalPositions,
      set[["LG4"]]@interpolations$spline@rates,type="l",col=rec.cols[2],lwd=2)
lines(set[["LG4"]]@interpolations$loess@physicalPositions,
      set[["LG4"]]@interpolations$loess@rates,type="l",lwd=2,col=rec.cols[3])
legend("topleft",bty='n',col=rec.cols,
       lwd=2,c("Sliding Window","Spline","Loess"),cex=1.5)

  
### add legend
#create the legend text, and parameters
lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
lgnd<-c(lgnd,expression(delta~-divergence),"Jost's D","gene regions",
        expression("Shared"~italic(F[ST])~"outlier"),"Salinity-associated")
pchs<-c(rep(32,6),15,124,25)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
        "#dfc27d","#a6611a","indianred","black","black")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,1)
#plot the legend
par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), mar=c(0, 0, 0, 0), new=TRUE,xpd=TRUE)
plot(c(0,1),c(0,1),bty='n',type='n',xlab="",ylab="",xaxt='n',yaxt='n')
legend("top",legend=lgnd,pch=pchs,ncol=5,
       bty='n',lwd=lwds,lty=ltys,
       col=cols,xpd=TRUE,cex=1.5,
       x.intersp = 0.5,y.intersp = 0.75)
dev.off()
## png 
##   2
LG 4 Figure

LG 4 Figure